Пример #1
0
static int feature_node_lua_remove_leaf(lua_State *L)
{
  GtGenomeNode **parent, **leaf;
  GtFeatureNode *pf, *lf;
  parent = check_genome_node(L, 1);
  leaf  = check_genome_node(L, 2);
  pf = gt_feature_node_try_cast(*parent);
  luaL_argcheck(L, pf, 1, "not a feature node");
  lf = gt_feature_node_try_cast(*leaf);
  luaL_argcheck(L, lf, 2, "not a feature node");
  gt_feature_node_remove_leaf(pf, lf);
  return 0;
}
Пример #2
0
static int genome_node_lua_add_child(lua_State *L)
{
  GtGenomeNode **parent, **child;
  GtFeatureNode *pf, *cf;
  parent = check_genome_node(L, 1);
  child  = check_genome_node(L, 2);
  pf = gt_feature_node_try_cast(*parent);
  luaL_argcheck(L, pf, 1, "not a feature node");
  cf = gt_feature_node_try_cast(*child);
  luaL_argcheck(L, cf, 2, "not a feature node");
  gt_feature_node_add_child(pf, (GtFeatureNode*)
                                gt_genome_node_ref((GtGenomeNode*) cf));
  return 0;
}
static int md5_to_seqid(GtGenomeNode *gn, GtRegionMapping *region_mapping,
                        GtError *err)
{
  GtStr *seqid;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(gn && region_mapping);
  seqid = gt_genome_node_get_seqid(gn);
  if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) {
    /* seqid is a MD5 seqid -> change id */
    GtStr *desc = gt_str_new();
    had_err = gt_region_mapping_get_description(region_mapping, desc, seqid,
                                                err);
    if (!had_err) {
      GtStr *new_seqid = gt_str_new();
      gt_regular_seqid_save(new_seqid, desc);
      if (gt_feature_node_try_cast(gn)) {
        M2IChangeSeqidInfo info;
        info.new_seqid = new_seqid;
        info.region_mapping = region_mapping;
        had_err = gt_feature_node_traverse_children((GtFeatureNode*) gn, &info,
                                                    m2i_change_seqid, true,
                                                    err);
      }
      else
        gt_genome_node_change_seqid(gn, new_seqid);
      gt_str_delete(new_seqid);
    }
    gt_str_delete(desc);
  }
  return had_err;
}
Пример #4
0
static int feature_node_lua_extract_sequence(lua_State *L)
{
  GtGenomeNode **gn;
  GtFeatureNode *fn;
  const char *type;
  bool join;
  GtRegionMapping **region_mapping;
  GtStr *sequence;
  GtError *err;
  gn = check_genome_node(L, 1);
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  type = luaL_checkstring(L, 2);
  join = lua_toboolean(L, 3);
  region_mapping = check_region_mapping(L, 4);
  err = gt_error_new();
  sequence = gt_str_new();
  if (gt_extract_feature_sequence(sequence, *gn, type, join, NULL, NULL,
                                  *region_mapping, err)) {
    gt_str_delete(sequence);
    return gt_lua_error(L, err);
  }
  if (gt_str_length(sequence))
    lua_pushstring(L, gt_str_get(sequence));
  else
    lua_pushnil(L);
  gt_str_delete(sequence);
  gt_error_delete(err);
  return 1;
}
Пример #5
0
static int feature_node_lua_get_source(lua_State *L)
{
  GtGenomeNode **gn = check_genome_node(L, 1);
  GtFeatureNode *fn;
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  lua_pushstring(L, gt_feature_node_get_source(fn));
  return 1;
}
Пример #6
0
static int filter_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                              GtError *error)
{
  AgnFilterStream *stream;
  GtFeatureNode *fn;
  int had_err;
  gt_error_check(error);
  stream = filter_stream_cast(ns);

  if(gt_queue_size(stream->cache) > 0)
  {
    *gn = gt_queue_get(stream->cache);
    return 0;
  }

  while(1)
  {
    had_err = gt_node_stream_next(stream->in_stream, gn, error);
    if(had_err)
      return had_err;
    if(!*gn)
      return 0;

    fn = gt_feature_node_try_cast(*gn);
    if(!fn)
      return 0;

    GtFeatureNode *current;
    GtFeatureNodeIterator *iter = gt_feature_node_iterator_new(fn);
    for(current  = gt_feature_node_iterator_next(iter);
        current != NULL;
        current  = gt_feature_node_iterator_next(iter))
    {
      const char *type = gt_feature_node_get_type(current);
      bool keepfeature = false;
      if(gt_hashmap_get(stream->typestokeep, type) != NULL)
        keepfeature = true;

      if(keepfeature)
      {
        gt_genome_node_ref((GtGenomeNode *)current);
        gt_queue_add(stream->cache, current);
      }
    }
    gt_feature_node_iterator_delete(iter);
    gt_genome_node_delete((GtGenomeNode *)fn);
    if(gt_queue_size(stream->cache) > 0)
    {
      *gn = gt_queue_get(stream->cache);
      return 0;
    }
  }

  return 0;
}
Пример #7
0
static int feature_node_lua_output_leading(lua_State *L)
{
  GtGenomeNode **gn;
  GtFeatureNode *fn;
  gn = check_genome_node(L, 1);
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  gt_gff3_output_leading(fn, NULL);
  return 0;
}
Пример #8
0
static int feature_node_lua_get_strand(lua_State *L)
{
  GtGenomeNode **gn = check_genome_node(L, 1);
  GtFeatureNode *fn;
  char strand_char[2];
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  strand_char[0] = GT_STRAND_CHARS[gt_feature_node_get_strand(fn)];
  strand_char[1] = '\0';
  lua_pushstring(L, strand_char);
  return 1;
}
Пример #9
0
static int feature_node_lua_get_score(lua_State *L)
{
  GtGenomeNode **gn = check_genome_node(L, 1);
  GtFeatureNode *fn;
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  if (gt_feature_node_score_is_defined(fn))
    lua_pushnumber(L, gt_feature_node_get_score(fn));
  else
    lua_pushnil(L);
  return 1;
}
Пример #10
0
static int feature_node_lua_set_source(lua_State *L)
{
  const char *source;
  GtStr *source_str;
  GtGenomeNode **gn = check_genome_node(L, 1);
  GtFeatureNode *fn;
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  source = luaL_checkstring(L, 2);
  source_str = gt_str_new_cstr(source);
  gt_feature_node_set_source(fn, source_str);
  gt_str_delete(source_str);
  return 0;
}
Пример #11
0
static int feature_node_lua_get_attribute(lua_State *L)
{
  GtGenomeNode **gn = check_genome_node(L, 1);
  const char *attr = NULL, *attrval = NULL;
  attr = luaL_checkstring(L, 2);
  GtFeatureNode *fn;
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  attrval = gt_feature_node_get_attribute(fn, attr);
  if (attrval)
    lua_pushstring(L, attrval);
  else
    lua_pushnil(L);
  return 1;
}
Пример #12
0
static int gt_array_out_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                    GtError *err)
{
  GtArrayOutStream *aos;
  GtGenomeNode *node, *gn_ref;
  int had_err = 0;

  gt_error_check(err);
  aos = gt_array_out_stream_cast(gs);
  had_err = gt_node_stream_next(aos->in_stream, gn, err);

  if (!had_err && *gn) {
    if ((node = gt_feature_node_try_cast(*gn))) {
      gn_ref = gt_genome_node_ref(*gn);
      gt_array_add(aos->nodes, gn_ref);
    }
  }

  return had_err;
}
Пример #13
0
static int feature_node_lua_get_exons(lua_State *L)
{
  GtGenomeNode **gn = check_genome_node(L, 1);
  GtArray *exons = gt_array_new(sizeof (GtGenomeNode*));
  GtUword i = 0;
  GtFeatureNode *fn;
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  gt_feature_node_get_exons(fn, exons);
  lua_newtable(L);
  for (i = 0; i < gt_array_size(exons); i++) {
    lua_pushnumber(L, i+1);
    gt_lua_genome_node_push(L, (GtGenomeNode*)
                            gt_genome_node_ref(*(GtGenomeNode**)
                                               gt_array_get(exons, i)));
    lua_rawset(L, -3);
  }
  gt_array_delete(exons);
  return 1;
}
static int targetbest_filter_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                         GtError *err)
{
  GtTargetbestFilterStream *tfs;
  GtGenomeNode *node;
  int had_err = 0;
  gt_error_check(err);
  tfs = targetbest_filter_stream_cast(gs);

  if (!tfs->in_stream_processed) {
    while (!(had_err = gt_node_stream_next(tfs->in_stream, &node, err)) &&
           node) {
      if (gt_feature_node_try_cast(node) &&
          gt_feature_node_get_attribute((GtFeatureNode*) node, "Target")) {
        filter_targetbest((GtFeatureNode*) node, tfs->trees,
                          tfs->target_to_elem);
      }
      else
        gt_dlist_add(tfs->trees, node);
    }
    tfs->next = gt_dlist_first(tfs->trees);
    tfs->in_stream_processed = true;
  }

  if (!had_err) {
    gt_assert(tfs->in_stream_processed);
    if (tfs->next) {
      *gn = gt_dlistelem_get_data(tfs->next);
      tfs->next = gt_dlistelem_next(tfs->next);
    }
    else
      *gn = NULL;
    return 0;
  }

  return had_err;
}
static int gt_seqpos_classifier_next_fn(GtSeqposClassifier *seqpos_classifier,
    GtError *err)
{
  int had_err = 0;
  gt_assert(seqpos_classifier != NULL);
  if (seqpos_classifier->fni != NULL)
  {
    gt_feature_node_iterator_delete(seqpos_classifier->fni);
    seqpos_classifier->fni = NULL;
  }
  while (true)
  {
    if (seqpos_classifier->gn != NULL)
    {
      gt_genome_node_delete(seqpos_classifier->gn);
    }
    had_err = gt_node_stream_next(seqpos_classifier->annotation_stream,
        &seqpos_classifier->gn, err);
    if (had_err != 0 || seqpos_classifier->gn == NULL)
    {
      seqpos_classifier->fn = NULL;
      seqpos_classifier->gn = NULL;
      return had_err;
    }
    else
    {
      if ((seqpos_classifier->fn =
            gt_feature_node_try_cast(seqpos_classifier->gn)) != NULL)
      {
        seqpos_classifier->fni =
          gt_feature_node_iterator_new(seqpos_classifier->fn);
        return had_err;
      }
    }
  }
}
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtSNPAnnotatorStream *sas;
  int had_err = 0;
  bool complete_cluster = false;
  GtGenomeNode *mygn = NULL;
  GtFeatureNode *fn = NULL;
  const char *snv_type = gt_symbol(gt_ft_SNV),
             *snp_type = gt_symbol(gt_ft_SNP),
             *gene_type = gt_symbol(gt_ft_gene);
  gt_error_check(err);
  sas = gt_snp_annotator_stream_cast(ns);

  /* if there are still SNPs left in the buffer, output them */
  if (gt_queue_size(sas->outqueue) > 0) {
    *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
    return had_err;
  } else complete_cluster = false;

  while (!had_err && !complete_cluster) {
    had_err = gt_node_stream_next(sas->merge_stream, &mygn, err);

    /* stop if stream is at the end */
    if (had_err || !mygn) break;

    /* process all feature nodes */
    if ((fn = gt_feature_node_try_cast(mygn))) {
      GtGenomeNode *addgn;
      const char *type = gt_feature_node_get_type(fn);
      GtRange new_rng = gt_genome_node_get_range(mygn);
      if (type == snv_type || type == snp_type) {
        /* -----> this is a SNP <----- */
        if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
          /* it falls into the currently observed range */
          gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn));
        } else {
          /* SNP outside a gene, this cluster is done
             add to out queue and start serving */
          gt_assert(gt_queue_size(sas->outqueue) == 0);
          had_err = snp_annotator_stream_process_current_gene(sas, err);
          gt_queue_add(sas->outqueue, mygn);
          if (gt_queue_size(sas->outqueue) > 0) {
            *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
            complete_cluster = true;
          }
        }
      } else if (type == gene_type) {
        /* -----> this is a gene <----- */
        if (gt_array_size(sas->cur_gene_set) == 0UL) {
          /* new overlapping gene cluster */
          addgn = gt_genome_node_ref(mygn);
          gt_array_add(sas->cur_gene_set, addgn);
          sas->cur_gene_range = gt_genome_node_get_range(mygn);
        } else {
          if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
            /* gene overlaps with current one, add to cluster */
            addgn = gt_genome_node_ref(mygn);
            gt_array_add(sas->cur_gene_set, addgn);
            sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng);
          } else {
            /* finish current cluster and start a new one */
            had_err = snp_annotator_stream_process_current_gene(sas, err);
            if (!had_err) {
              addgn = gt_genome_node_ref(mygn);
              gt_array_add(sas->cur_gene_set, addgn);
              sas->cur_gene_range = gt_genome_node_get_range(mygn);
            }
            if (gt_queue_size(sas->outqueue) > 0) {
              *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
              complete_cluster = true;
            }
          }
        }
        /* from now on, genes are kept in gene cluster arrays only */
        gt_genome_node_delete(mygn);
      }
    } else {
      /* meta node */
      had_err = snp_annotator_stream_process_current_gene(sas, err);
      if (!had_err) {
        gt_queue_add(sas->outqueue, mygn);
      }
      if (gt_queue_size(sas->outqueue) > 0) {
        *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
        complete_cluster = true;
      }
    }
  }

  return had_err;
}
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtLTRdigestFileOutStream *ls;
  GtFeatureNode *fn;
  GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD};
  int had_err;
  GtUword i=0;

  gt_error_check(err);
  ls = gt_ltrdigest_file_out_stream_cast(ns);

  /* initialize this element */
  memset(&ls->element, 0, sizeof (GtLTRElement));

  /* get annotations from parser */
  had_err = gt_node_stream_next(ls->in_stream, gn, err);
  if (!had_err && *gn)
  {
    GtFeatureNodeIterator* gni;
    GtFeatureNode *mygn;

    /* only process feature nodes */
    if (!(fn = gt_feature_node_try_cast(*gn)))
      return 0;

    ls->element.pdomorder = gt_array_new(sizeof (const char*));

    /* fill LTRElement structure from GFF3 subgraph */
    gni = gt_feature_node_iterator_new(fn);
    for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni))
      (void) gt_genome_node_accept((GtGenomeNode*) mygn,
                                   (GtNodeVisitor*) ls->lv,
                                   err);
    gt_feature_node_iterator_delete(gni);
  }

  if (!had_err && ls->element.mainnode != NULL)
  {
    char desc[GT_MAXFASTAHEADER];
    GtFeatureNode *ltr3, *ltr5;
    GtStr *sdesc, *sreg, *seq;

    /* find sequence in GtEncseq */
    sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode);

    sdesc = gt_str_new();
    had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err);

    if (!had_err) {
      GtRange rng;
      ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char));
      (void) snprintf(ls->element.seqid,
                      MIN((size_t) gt_str_length(sdesc),
                          (size_t) ls->seqnamelen)+1,
                      "%s", gt_str_get(sdesc));
      gt_cstr_rep(ls->element.seqid, ' ', '_');
      if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen)
        ls->element.seqid[ls->seqnamelen] = '\0';

      (void) gt_ltrelement_format_description(&ls->element,
                                              ls->seqnamelen,
                                              desc,
                                              (size_t) (GT_MAXFASTAHEADER-1));
      gt_str_delete(sdesc);

      /* output basic retrotransposon data */
      lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR);
      rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR);
      rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode);
      gt_file_xprintf(ls->tabout_file,
                      GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t"
                      GT_WU"\t"GT_WU"\t"GT_WU"\t",
                      rng.start, rng.end, gt_ltrelement_length(&ls->element),
                      ls->element.seqid, lltr_rng.start, lltr_rng.end,
                      gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start,
                      rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element));
    }
    seq = gt_str_new();

    /* output TSDs */
    if (!had_err && ls->element.leftTSD != NULL)
    {
      GtRange tsd_rng;
      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.leftTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%s\t",
                         tsd_rng.start,
                         tsd_rng.end,
                         gt_str_get(seq));
      }
    gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    if (!had_err && ls->element.rightTSD != NULL)
    {
      GtRange tsd_rng;

      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.rightTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t",
                           tsd_rng.start,
                           tsd_rng.end,
                           gt_str_get(seq));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    /* output PPT */
    if (!had_err && ls->element.ppt != NULL)
    {
      GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt);

      ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt);
      had_err = gt_extract_feature_sequence(seq,
                                            (GtGenomeNode*) ls->element.ppt,
                                            gt_symbol(gt_ft_RR_tract), false,
                                            NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng),
                            GT_FSWIDTH, ls->pptout_file);
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t",
                           ppt_rng.start,
                           ppt_rng.end,
                           gt_str_get(seq),
                           GT_STRAND_CHARS[ppt_strand],
                           (ppt_strand == GT_STRAND_FORWARD ?
                               abs((int) (rltr_rng.start - ppt_rng.end)) :
                               abs((int) (lltr_rng.end - ppt_rng.start))));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t");

    /* output PBS */
    if (!had_err && ls->element.pbs != NULL)
    {
      GtStrand pbs_strand;

      pbs_strand = gt_feature_node_get_strand(ls->element.pbs);
      pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs);
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.pbs,
                                           gt_symbol(gt_ft_primer_binding_site),
                                           false, NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng),
                            GT_FSWIDTH, ls->pbsout_file);
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t",
                         pbs_rng.start,
                         pbs_rng.end,
                         GT_STRAND_CHARS[pbs_strand],
                         gt_feature_node_get_attribute(ls->element.pbs, "trna"),
                         gt_str_get(seq),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "pbsoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "trnaoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "edist"));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t");

    /* output protein domains */
    if (!had_err && ls->element.pdoms != NULL)
    {
      GtStr *pdomorderstr = gt_str_new();
      for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* key = *(const char**) gt_array_get(ls->element.pdomorder,
                                                       i);
        GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key);
        had_err = write_pdom(ls, entry, key, ls->rmap, desc, err);
      }

      if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode))
        gt_array_reverse(ls->element.pdomorder);

      for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* name = *(const char**) gt_array_get(ls->element.pdomorder,
                                                        i);
        gt_str_append_cstr(pdomorderstr, name);
        if (i != gt_array_size(ls->element.pdomorder)-1)
          gt_str_append_cstr(pdomorderstr, "/");
      }
      gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr));
      gt_str_delete(pdomorderstr);
    }

    /* output LTRs (we just expect them to exist) */
    switch (gt_feature_node_get_strand(ls->element.mainnode))
    {
      case GT_STRAND_REVERSE:
        ltr5 = ls->element.rightLTR;
        ltr3 = ls->element.leftLTR;
        break;
      case GT_STRAND_FORWARD:
      default:
        ltr5 = ls->element.leftLTR;
        ltr3 = ls->element.rightLTR;
        break;
    }

    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr5out_file);
      gt_str_reset(seq);
    }
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr3out_file);
      gt_str_reset(seq);
    }

    /* output complete oriented element */
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.mainnode,
                                           gt_symbol(gt_ft_LTR_retrotransposon),
                                           false,
                                           NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->elemout_file);
      gt_str_reset(seq);
    }
    gt_file_xprintf(ls->tabout_file, "\n");
    gt_str_delete(seq);
  }
  gt_hashmap_delete(ls->element.pdoms);
  gt_array_delete(ls->element.pdomorder);
  gt_free(ls->element.seqid);
  return had_err;
}
Пример #18
0
static int chseqids_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                GtError *err)
{
  GtChseqidsStream *cs;
  GtGenomeNode *node, **gn_a, **gn_b;
  GtFeatureNode *feature_node;
  GtStr *changed_seqid;
  unsigned long i;
  int rval, had_err = 0;
  gt_error_check(err);
  cs = chseqids_stream_cast(gs);

  if (!cs->sequence_regions_processed) {
    while (!had_err) {
      if (!(had_err = gt_node_stream_next(cs->in_stream, &node, err))) {
        if (node)
          gt_array_add(cs->gt_genome_node_buffer, node);
        else
          break;
        if (!gt_region_node_try_cast(node))
          break; /* no more sequence regions */
      }
    }
    /* now the buffer contains only sequence regions (except the last entry)
       -> change sequence ids */
    for (i = 0; !had_err && i < gt_array_size(cs->gt_genome_node_buffer); i++) {
      node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_node_get_seqid(node)) {
        if  ((changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                     gt_str_get(gt_genome_node_get_seqid(node)),
                                                 err))) {
          if ((feature_node = gt_feature_node_try_cast(node))) {
            rval = gt_genome_node_traverse_children(node, changed_seqid,
                                                    change_sequence_id, true,
                                                    err);
            gt_assert(!rval); /* change_sequence_id() is sane */
          }
          else
            gt_genome_node_change_seqid(node, changed_seqid);
          gt_str_delete(changed_seqid);
        }
        else
          had_err = -1;
       }
    }
    /* sort them */
    if (!had_err)
      gt_genome_nodes_sort(cs->gt_genome_node_buffer);
    /* consolidate them */
    for (i = 1; !had_err && i + 1 < gt_array_size(cs->gt_genome_node_buffer);
         i++) {
      gn_a = gt_array_get(cs->gt_genome_node_buffer, i-1);
      gn_b = gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_nodes_are_equal_region_nodes(*gn_a, *gn_b)) {
        gt_region_node_consolidate(gt_region_node_cast(*gn_b),
                                   gt_region_node_cast(*gn_a));
        gt_genome_node_delete(*gn_a);
        *gn_a = NULL;
      }
    }
    cs->sequence_regions_processed = true;
  }

  /* return non-null nodes from buffer */
  while (!had_err &&
         cs->buffer_index < gt_array_size(cs->gt_genome_node_buffer)) {
    node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer,
                                           cs->buffer_index);
    cs->buffer_index++;
    if (node) {
      *gn = node;
      return had_err;
    }
  }

  if (!had_err)
    had_err = gt_node_stream_next(cs->in_stream, gn, err);
  if (!had_err && *gn) {
    if (gt_genome_node_get_seqid(*gn)) {
      changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                      gt_str_get(gt_genome_node_get_seqid(*gn)),
                                         err);
      gt_assert(changed_seqid); /* is always defined, because an undefined
                                   mapping would be catched earlier */
      if ((feature_node = gt_feature_node_try_cast(*gn))) {
        rval = gt_genome_node_traverse_children(*gn, changed_seqid,
                                                change_sequence_id, true, err);
        gt_assert(!rval); /* change_sequence_id() is sane */
      }
      else
        gt_genome_node_change_seqid(*gn, changed_seqid);
      gt_str_delete(changed_seqid);
    }
  }

  return had_err;
}
Пример #19
0
static int cluster_annotate_nodes(GtClusteredSet *cs, GtEncseq *encseq,
                                  const char *feature, GtArray *nodes,
                                  GtError *err)
{
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL, *tmp;
  GtClusteredSetIterator *csi = NULL;
  GtGenomeNode *gn;
  GtHashmap *desc2node;
  GtStr *seqid = NULL;
  int had_err = 0;
  unsigned long num_of_clusters, i, elm;
  const char *fnt = NULL;
  char buffer[BUFSIZ], *real_feature;
  gt_error_check(err);

  if ((strcmp(feature, "lLTR") == 0) || (strcmp(feature, "rLTR") == 0))
    real_feature = gt_cstr_dup(gt_ft_long_terminal_repeat);
  else
    real_feature = gt_cstr_dup(feature);

  desc2node = gt_hashmap_new(GT_HASH_STRING, free_hash, NULL);
  for (i = 0; i < gt_array_size(nodes); i++) {
    gn = *(GtGenomeNode**) gt_array_get(nodes, i);
    if (gt_feature_node_try_cast(gn) == NULL)
      continue;
    fni = gt_feature_node_iterator_new((GtFeatureNode*) gn);
    while ((curnode = gt_feature_node_iterator_next(fni)) != NULL) {
      char header[BUFSIZ];
      fnt = gt_feature_node_get_type(curnode);
      if (strcmp(fnt, gt_ft_repeat_region) == 0) {
        const char *rid;
        unsigned long id;
        seqid = gt_genome_node_get_seqid((GtGenomeNode*) curnode);
        rid = gt_feature_node_get_attribute(curnode, "ID");
        (void) sscanf(rid, "repeat_region%lu", &id);
        (void) snprintf(buffer, BUFSIZ, "%s_%lu", gt_str_get(seqid), id);
      } else if (strcmp(fnt, gt_ft_protein_match) == 0) {
        GtRange range;
        const char *attr;
        attr = gt_feature_node_get_attribute(curnode, "name");
        if (!attr)
          continue;
        if (strcmp(feature, attr) != 0)
          continue;
        range = gt_genome_node_get_range((GtGenomeNode*) curnode);
        if ((range.end - range.start + 1) < 10UL)
          continue;
        (void) snprintf(header, BUFSIZ, "%s_%lu_%lu", buffer, range.start,
                        range.end);
        gt_hashmap_add(desc2node, (void*) gt_cstr_dup(header), (void*) curnode);
      } else if (strcmp(fnt, real_feature) == 0) {
        GtRange range;
        range = gt_genome_node_get_range((GtGenomeNode*) curnode);
        if ((range.end - range.start + 1) < 10UL)
          continue;
        (void) snprintf(header, BUFSIZ, "%s_%lu_%lu", buffer, range.start,
                        range.end);
        gt_hashmap_add(desc2node, (void*) gt_cstr_dup(header), (void*) curnode);
      }
    }
    gt_feature_node_iterator_delete(fni);
  }
  gt_free(real_feature);

  num_of_clusters = gt_clustered_set_num_of_clusters(cs, err);
  for (i = 0; i < num_of_clusters; i++) {
    csi = gt_clustered_set_get_iterator(cs, i ,err);
    if (csi != NULL) {
      while (!had_err && (gt_clustered_set_iterator_next(csi, &elm, err)
             != GT_CLUSTERED_SET_ITERATOR_STATUS_END)) {
        char clid[BUFSIZ];
        const char *encseqdesc;
        char *encseqid;
        unsigned long desclen;
        encseqdesc = gt_encseq_description(encseq, &desclen, elm);
        encseqid = gt_calloc((size_t) (desclen + 1), sizeof (char));
        (void) strncpy(encseqid, encseqdesc, (size_t) desclen);
        encseqid[desclen] = '\0';
        tmp = (GtFeatureNode*) gt_hashmap_get(desc2node, (void*) encseqid);
        (void) snprintf(clid, BUFSIZ, "%lu", i);
        gt_feature_node_set_attribute(tmp, "clid", clid);
        gt_free(encseqid);
      }
    }
    gt_clustered_set_iterator_delete(csi, err);
    csi = NULL;
  }
  gt_hashmap_delete(desc2node);
  return had_err;
}