GtNodeVisitor* gt_snp_annotator_visitor_new(GtFeatureNode *gene,
                                            GtTransTable *trans_table,
                                            GtRegionMapping *rmap,
                                            GtError *err)
{
  GtNodeVisitor *nv;
  GtSNPAnnotatorVisitor *sav;
  gt_assert(gene && gt_feature_node_get_type(gene) == gt_symbol(gt_ft_gene));
  nv = gt_node_visitor_create(gt_snp_annotator_visitor_class());
  sav = snp_annotator_visitor_cast(nv);
  sav->gene = (GtFeatureNode*) gt_genome_node_ref((GtGenomeNode*) gene);
  sav->rmap = gt_region_mapping_ref(rmap);
  sav->mRNA_type = gt_symbol(gt_ft_mRNA);
  sav->CDS_type = gt_symbol(gt_ft_CDS);
  sav->SNV_type = gt_symbol(gt_ft_SNV);
  sav->SNP_type = gt_symbol(gt_ft_SNP);
  sav->rnaseqs = gt_hashmap_new(GT_HASH_DIRECT, NULL, gt_free_func);
  if (trans_table) {
    sav->tt = trans_table;
    sav->own_tt = false;
  } else {
    sav->tt = gt_trans_table_new_standard(err);
    sav->own_tt = true;
  }
  if (!sav->tt || gt_snp_annotator_visitor_prepare_gene(sav, err) != 0) {
    gt_node_visitor_delete(nv);
    return NULL;
  }
  return nv;
}
Ejemplo n.º 2
0
static void* test_symbol(GT_UNUSED void *data)
{
  GtStr *symbol;
  GtUword i;
  symbol = gt_str_new();
  for (i = 0; i < NUMBER_OF_SYMBOLS; i++) {
    gt_str_reset(symbol);
    gt_str_append_ulong(symbol, gt_rand_max(MAX_SYMBOL));
    gt_symbol(gt_str_get(symbol));
    gt_assert(!strcmp(gt_symbol(gt_str_get(symbol)), gt_str_get(symbol)));
  }
  gt_str_delete(symbol);
  return NULL;
}
Ejemplo n.º 3
0
GtGenomeNode* gt_feature_node_new(GtStr *seqid, const char *type,
                                  GtUword start, GtUword end,
                                  GtStrand strand)
{
  GtGenomeNode *gn;
  GtFeatureNode *fn;
  gt_assert(seqid && type);
  gt_assert(start <= end);
  gn = gt_genome_node_create(gt_feature_node_class());
  fn = gt_feature_node_cast(gn);
  fn->seqid       = gt_str_ref(seqid);
  fn->source      = NULL;
  fn->type        = gt_symbol(type);
  fn->score       = GT_UNDEF_FLOAT;
  fn->range.start = start;
  fn->range.end   = end;
  fn->representative = NULL;
  fn->attributes  = NULL;
  fn->bit_field   = 0;
  fn->bit_field  |= strand << STRAND_OFFSET;
  fn->children    = NULL; /* the children list is create on demand */
  fn->observer    = NULL;
  gt_feature_node_set_phase(fn, GT_PHASE_UNDEFINED);
  set_transcriptfeaturetype(fn, TRANSCRIPT_FEATURE_TYPE_UNDETERMINED);
  set_tree_status(&fn->bit_field, IS_TREE);
  /* the DFS status is set to DFS_WHITE already */
  fn->representative = NULL;
  return gn;
}
Ejemplo n.º 4
0
void gt_type_graph_add_stanza(GtTypeGraph *type_graph,
                              const GtOBOStanza *stanza)
{
  const char *id_value, *name_value;
  GtUword i, size;
  GtTypeNode *node;
  GtStr *buf;
  gt_assert(type_graph && stanza && !type_graph->ready);
  gt_assert(gt_obo_stanza_size(stanza, "id") == 1);
  gt_assert(gt_obo_stanza_size(stanza, "name") == 1);
  id_value = gt_symbol(gt_obo_stanza_get_value(stanza, "id", 0));
  name_value = gt_symbol(gt_obo_stanza_get_value(stanza, "name", 0));
  gt_assert(id_value);
  gt_assert(name_value);
  gt_assert(!gt_hashmap_get(type_graph->nodemap, id_value));
  node = gt_type_node_new(gt_array_size(type_graph->nodes), id_value);
  gt_hashmap_add(type_graph->name2id, (char*) name_value, (char*) id_value);
  gt_hashmap_add(type_graph->id2name, (char*) id_value, (char*) name_value);
  gt_hashmap_add(type_graph->nodemap, (char*) id_value, node);
  gt_array_add(type_graph->nodes, node);
  buf = gt_str_new();
  /* store is_a entries in node, if necessary */
  if ((size = gt_obo_stanza_size(stanza, "is_a"))) {
    for (i = 0; i < size; i++) {
      const char *id = gt_obo_stanza_get_value(stanza, "is_a", i);
      gt_str_reset(buf);
      gt_str_append_cstr_nt(buf, id, strcspn(id, " \n"));
      gt_type_node_is_a_add(node, gt_symbol(gt_str_get(buf)));
    }
  }
  /* store part_of entries in node, if necessary */
  if ((size = gt_obo_stanza_size(stanza, "relationship"))) {
    for (i = 0; i < size; i++) {
      const char *rel = gt_obo_stanza_get_value(stanza, "relationship", i);
      gt_str_reset(buf);
      /* match part_of */
      if (!strncmp(rel, PART_OF, strlen(PART_OF))) {
        const char *part_of = rel + strlen(PART_OF) + 1;
        gt_str_append_cstr_nt(buf, part_of, strcspn(part_of, " \n"));
        gt_type_node_part_of_add(node, gt_symbol(gt_str_get(buf)));
        continue;
      }
      /* match member_of */
      if (!strncmp(rel, MEMBER_OF, strlen(MEMBER_OF))) {
        const char *member_of = rel + strlen(MEMBER_OF) + 1;
        gt_str_append_cstr_nt(buf, member_of, strcspn(member_of, " \n"));
        gt_type_node_part_of_add(node, gt_symbol(gt_str_get(buf)));
        continue;
      }
      /* match integral_part_of */
      if (!strncmp(rel, INTEGRAL_PART_OF, strlen(INTEGRAL_PART_OF))) {
        const char *integral_part_of = rel + strlen(INTEGRAL_PART_OF) + 1;
        gt_str_append_cstr_nt(buf, integral_part_of,
                              strcspn(integral_part_of, " \n"));
        gt_type_node_part_of_add(node, gt_symbol(gt_str_get(buf)));
      }
    }
  }
  gt_str_delete(buf);
}
Ejemplo n.º 5
0
bool gt_script_filter_validate(GtScriptFilter *script_filter, GtError *err)
{
  const char *result;

#ifndef NDEBUG
  GT_UNUSED int stack_size;
#endif
  gt_assert(script_filter);
  gt_error_check(err);

#ifndef NDEBUG
  stack_size = lua_gettop(script_filter->L);
#endif

  result = gt_script_filter_get_name(script_filter, err);
  if (result == gt_symbol("undefined")) {
    gt_error_set(err, "metadata 'name' not found");
    return false;
  }
  result = gt_script_filter_get_description(script_filter, err);
  if (result == gt_symbol("undefined")) {
    gt_error_set(err, "metadata 'description' not found");
    return false;
  }
  result = gt_script_filter_get_short_description(script_filter, err);
  if (result == gt_symbol("undefined")) {
    gt_error_set(err, "metadata 'short_descr' not found");
    return false;
  }
  result = gt_script_filter_get_author(script_filter, err);
  if (result == gt_symbol("undefined")) {
    gt_error_set(err, "metadata 'author' not found");
    return false;
  }
  result = gt_script_filter_get_email(script_filter, err);
  if (result == gt_symbol("undefined")) {
    gt_error_set(err, "metadata 'email' not found");
    return false;
  }
  result = gt_script_filter_get_version(script_filter, err);
  if (result == gt_symbol("undefined")) {
    gt_error_set(err, "metadata 'version' not found");
    return false;
  }

  lua_getglobal(script_filter->L, "filter");
  if (lua_isnil(script_filter->L, -1)) {
    gt_error_set(err, "function 'filter' is not defined");
    lua_pop(script_filter->L, 1);
    return false;
  }
  return true;
}
Ejemplo n.º 6
0
/* TODO: caching */
static const char *gt_script_filter_get_string(GtScriptFilter *script_filter,
                                              const char *name, GtError *err)
{
#ifndef NDEBUG
  int stack_size;
#endif
  gt_assert(script_filter && name);
  gt_error_check(err);

#ifndef NDEBUG
  stack_size = lua_gettop(script_filter->L);
#endif

  lua_getglobal(script_filter->L, name);
  if (lua_isnil(script_filter->L, -1)) {
    lua_pop(script_filter->L, 1);
    return gt_symbol("undefined");
  }
  /* execute callback if function is given */
  if (lua_isfunction(script_filter->L, -1))
  {
    int num_of_args = 0;
    if (lua_pcall(script_filter->L, num_of_args, 1, 0) != 0)
    {
      gt_error_set(err, "%s", lua_tostring(script_filter->L, -1));
      lua_pop(script_filter->L, 1);
      gt_assert(lua_gettop(script_filter->L) == stack_size);
      return NULL;
    }
  }

  if (lua_isnil(script_filter->L, -1) || !lua_isstring(script_filter->L, -1)) {
    lua_pop(script_filter->L, 1);
    gt_assert(lua_gettop(script_filter->L) == stack_size);
    gt_error_set(err, "script filter '%s': '%s' must return a string",
                 gt_str_get(script_filter->filename), name);
    return NULL;
  }

  /* retrieve string */
  return lua_tostring(script_filter->L, -1);
}
Ejemplo n.º 7
0
GtNodeVisitor* gt_extract_feature_visitor_new(GtRegionMapping *rm,
                                              const char *type, bool join,
                                              bool translate, bool seqid,
                                              bool target, GtUword width,
                                              GtFile *outfp)
{
  GtNodeVisitor *nv;
  GtExtractFeatureVisitor *efv;
  gt_assert(rm);
  nv = gt_node_visitor_create(gt_extract_feature_visitor_class());
  efv= gt_extract_feature_visitor_cast(nv);
  efv->type = gt_symbol(type);
  efv->join = join;
  efv->translate = translate;
  efv->seqid = seqid;
  efv->target = target;
  efv->fastaseq_counter = 0;
  efv->region_mapping = rm;
  efv->width = width;
  efv->outfp = outfp;
  return nv;
}
static int snp_annotator_classify_snp(GtSNPAnnotatorVisitor *sav,
                                      GtFeatureNode *mRNA,
                                      GtFeatureNode *snp,
                                      GtUword variant_pos,
                                      GtUword variant_idx,
                                      char variant_char,
#ifndef NDEBUG
                                      GT_UNUSED char reference_char,
#endif
                                      GT_UNUSED GtError *err)
{
  int had_err = 0;
  char *mrnaseq;
  const char *variant_effect = NULL;
  gt_assert(mRNA && snp && sav);
  gt_log_log("processing variant char %c for SNP %s\n",
               variant_char, gt_feature_node_get_attribute(snp, "Dbxref"));
  mrnaseq = gt_hashmap_get(sav->rnaseqs, mRNA);
  gt_assert(mrnaseq);
  if (mrnaseq) {
    char codon[3],
         variant_codon[3];
    GtStr *effect_string;
    char oldamino,
         newamino;
    GT_UNUSED GtUword mrnalen;
    GtUword startpos = variant_pos / GT_CODON_LENGTH,
                  variantoffset = variant_pos % GT_CODON_LENGTH;
    mrnalen = strlen(mrnaseq);
    gt_assert(variant_pos < mrnalen);
    variant_codon[0] = codon[0] = mrnaseq[3*startpos];
    variant_codon[1] = codon[1] = mrnaseq[3*startpos+1];
    variant_codon[2] = codon[2] = mrnaseq[3*startpos+2];
    variant_codon[variantoffset] = variant_char;
#ifndef NDEBUG
    gt_assert(toupper(codon[variantoffset]) == toupper(reference_char));
#endif
    if (gt_trans_table_is_stop_codon(sav->tt, codon[0], codon[1], codon[2])) {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_STOP_EFFECT);
      } else {
        variant_effect = gt_symbol(GT_SNP_STOP_LOST_EFFECT);
      }
    } else {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_NONSENSE_EFFECT);
      } else {
        had_err = gt_trans_table_translate_codon(sav->tt, codon[0], codon[1],
                                                 codon[2], &oldamino, err);
        if (!had_err) {
          had_err = gt_trans_table_translate_codon(sav->tt, variant_codon[0],
                                                   variant_codon[1],
                                                   variant_codon[2],
                                                   &newamino, err);
        }
        if (!had_err) {
          if (newamino == oldamino) {
            variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_AMINO_EFFECT);
          } else {
            variant_effect = gt_symbol(GT_SNP_MISSENSE_EFFECT);
          }
        }
      }
    }
    if (!had_err) {
      const char *var_attrib;
      gt_assert(variant_effect != NULL);
      if ((var_attrib = gt_feature_node_get_attribute(snp,
                                                      GT_GVF_VARIANT_EFFECT))) {
        effect_string = gt_str_new_cstr(var_attrib);
        gt_str_append_cstr(effect_string, ",");
        gt_str_append_cstr(effect_string, variant_effect);
      } else {
        effect_string = gt_str_new_cstr(variant_effect);
      }
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_ulong(effect_string, variant_idx);
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string, gt_feature_node_get_type(mRNA));
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string,
                         gt_feature_node_get_attribute(mRNA, GT_GFF_ID));
      gt_feature_node_set_attribute(snp, GT_GVF_VARIANT_EFFECT,
                                    gt_str_get(effect_string));
      gt_str_reset(effect_string);
      gt_str_delete(effect_string);
    }
  }

  return had_err;
}
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtLTRdigestFileOutStream *ls;
  GtFeatureNode *fn;
  GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD};
  int had_err;
  GtUword i=0;

  gt_error_check(err);
  ls = gt_ltrdigest_file_out_stream_cast(ns);

  /* initialize this element */
  memset(&ls->element, 0, sizeof (GtLTRElement));

  /* get annotations from parser */
  had_err = gt_node_stream_next(ls->in_stream, gn, err);
  if (!had_err && *gn)
  {
    GtFeatureNodeIterator* gni;
    GtFeatureNode *mygn;

    /* only process feature nodes */
    if (!(fn = gt_feature_node_try_cast(*gn)))
      return 0;

    ls->element.pdomorder = gt_array_new(sizeof (const char*));

    /* fill LTRElement structure from GFF3 subgraph */
    gni = gt_feature_node_iterator_new(fn);
    for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni))
      (void) gt_genome_node_accept((GtGenomeNode*) mygn,
                                   (GtNodeVisitor*) ls->lv,
                                   err);
    gt_feature_node_iterator_delete(gni);
  }

  if (!had_err && ls->element.mainnode != NULL)
  {
    char desc[GT_MAXFASTAHEADER];
    GtFeatureNode *ltr3, *ltr5;
    GtStr *sdesc, *sreg, *seq;

    /* find sequence in GtEncseq */
    sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode);

    sdesc = gt_str_new();
    had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err);

    if (!had_err) {
      GtRange rng;
      ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char));
      (void) snprintf(ls->element.seqid,
                      MIN((size_t) gt_str_length(sdesc),
                          (size_t) ls->seqnamelen)+1,
                      "%s", gt_str_get(sdesc));
      gt_cstr_rep(ls->element.seqid, ' ', '_');
      if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen)
        ls->element.seqid[ls->seqnamelen] = '\0';

      (void) gt_ltrelement_format_description(&ls->element,
                                              ls->seqnamelen,
                                              desc,
                                              (size_t) (GT_MAXFASTAHEADER-1));
      gt_str_delete(sdesc);

      /* output basic retrotransposon data */
      lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR);
      rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR);
      rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode);
      gt_file_xprintf(ls->tabout_file,
                      GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t"
                      GT_WU"\t"GT_WU"\t"GT_WU"\t",
                      rng.start, rng.end, gt_ltrelement_length(&ls->element),
                      ls->element.seqid, lltr_rng.start, lltr_rng.end,
                      gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start,
                      rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element));
    }
    seq = gt_str_new();

    /* output TSDs */
    if (!had_err && ls->element.leftTSD != NULL)
    {
      GtRange tsd_rng;
      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.leftTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%s\t",
                         tsd_rng.start,
                         tsd_rng.end,
                         gt_str_get(seq));
      }
    gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    if (!had_err && ls->element.rightTSD != NULL)
    {
      GtRange tsd_rng;

      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.rightTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t",
                           tsd_rng.start,
                           tsd_rng.end,
                           gt_str_get(seq));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    /* output PPT */
    if (!had_err && ls->element.ppt != NULL)
    {
      GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt);

      ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt);
      had_err = gt_extract_feature_sequence(seq,
                                            (GtGenomeNode*) ls->element.ppt,
                                            gt_symbol(gt_ft_RR_tract), false,
                                            NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng),
                            GT_FSWIDTH, ls->pptout_file);
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t",
                           ppt_rng.start,
                           ppt_rng.end,
                           gt_str_get(seq),
                           GT_STRAND_CHARS[ppt_strand],
                           (ppt_strand == GT_STRAND_FORWARD ?
                               abs((int) (rltr_rng.start - ppt_rng.end)) :
                               abs((int) (lltr_rng.end - ppt_rng.start))));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t");

    /* output PBS */
    if (!had_err && ls->element.pbs != NULL)
    {
      GtStrand pbs_strand;

      pbs_strand = gt_feature_node_get_strand(ls->element.pbs);
      pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs);
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.pbs,
                                           gt_symbol(gt_ft_primer_binding_site),
                                           false, NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng),
                            GT_FSWIDTH, ls->pbsout_file);
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t",
                         pbs_rng.start,
                         pbs_rng.end,
                         GT_STRAND_CHARS[pbs_strand],
                         gt_feature_node_get_attribute(ls->element.pbs, "trna"),
                         gt_str_get(seq),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "pbsoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "trnaoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "edist"));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t");

    /* output protein domains */
    if (!had_err && ls->element.pdoms != NULL)
    {
      GtStr *pdomorderstr = gt_str_new();
      for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* key = *(const char**) gt_array_get(ls->element.pdomorder,
                                                       i);
        GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key);
        had_err = write_pdom(ls, entry, key, ls->rmap, desc, err);
      }

      if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode))
        gt_array_reverse(ls->element.pdomorder);

      for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* name = *(const char**) gt_array_get(ls->element.pdomorder,
                                                        i);
        gt_str_append_cstr(pdomorderstr, name);
        if (i != gt_array_size(ls->element.pdomorder)-1)
          gt_str_append_cstr(pdomorderstr, "/");
      }
      gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr));
      gt_str_delete(pdomorderstr);
    }

    /* output LTRs (we just expect them to exist) */
    switch (gt_feature_node_get_strand(ls->element.mainnode))
    {
      case GT_STRAND_REVERSE:
        ltr5 = ls->element.rightLTR;
        ltr3 = ls->element.leftLTR;
        break;
      case GT_STRAND_FORWARD:
      default:
        ltr5 = ls->element.leftLTR;
        ltr3 = ls->element.rightLTR;
        break;
    }

    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr5out_file);
      gt_str_reset(seq);
    }
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr3out_file);
      gt_str_reset(seq);
    }

    /* output complete oriented element */
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.mainnode,
                                           gt_symbol(gt_ft_LTR_retrotransposon),
                                           false,
                                           NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->elemout_file);
      gt_str_reset(seq);
    }
    gt_file_xprintf(ls->tabout_file, "\n");
    gt_str_delete(seq);
  }
  gt_hashmap_delete(ls->element.pdoms);
  gt_array_delete(ls->element.pdomorder);
  gt_free(ls->element.seqid);
  return had_err;
}
static int write_pdom(GtLTRdigestFileOutStream *ls, GtArray *pdoms,
                      const char *pdomname, GT_UNUSED GtRegionMapping *rmap,
                      char *desc, GtError *err)
{
  int had_err = 0;
  GtFile *seqfile = NULL,
            *alifile = NULL,
            *aafile = NULL;
  GtUword i = 0,
                seq_length = 0;
  GtStr *pdom_seq,
        *pdom_aaseq;
  gt_error_check(err);

  pdom_seq = gt_str_new();
  pdom_aaseq = gt_str_new();

  /* get protein domain output file */
  seqfile = (GtFile*) gt_hashmap_get(ls->pdomout_files, pdomname);
  if (seqfile == NULL)
  {
    /* no file opened for this domain yet, do it */
    char buffer[GT_MAXFILENAMELEN];
    (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1), "%s_pdom_%s.fas",
                    ls->fileprefix, pdomname);
    seqfile = gt_file_xopen(buffer, "w+");
    gt_hashmap_add(ls->pdomout_files, gt_cstr_dup(pdomname), seqfile);
  }

  /* get protein alignment output file */
  if (ls->write_pdom_alignments)
  {
    alifile = (GtFile*) gt_hashmap_get(ls->pdomali_files, pdomname);
    if (alifile == NULL)
    {
      /* no file opened for this domain yet, do it */
      char buffer[GT_MAXFILENAMELEN];
      (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1), "%s_pdom_%s.ali",
                      ls->fileprefix, pdomname);
      alifile = gt_file_xopen(buffer, "w+");
      gt_hashmap_add(ls->pdomali_files, gt_cstr_dup(pdomname), alifile);
    }
  }

  /* get amino acid sequence output file */
  if (ls->write_pdom_aaseqs)
  {
    aafile = (GtFile*) gt_hashmap_get(ls->pdomaa_files, pdomname);
    if (aafile == NULL)
    {
      /* no file opened for this domain yet, do it */
      char buffer[GT_MAXFILENAMELEN];
      (void) snprintf(buffer, (size_t) (GT_MAXFILENAMELEN-1),
                      "%s_pdom_%s_aa.fas",
                      ls->fileprefix, pdomname);
      aafile = gt_file_xopen(buffer, "w+");
      gt_hashmap_add(ls->pdomaa_files, gt_cstr_dup(pdomname), aafile);
    }
  }

  if (gt_array_size(pdoms) > 1UL)
  {
    for (i=1UL; i<gt_array_size(pdoms); i++)
    {
      gt_assert(gt_genome_node_cmp(*(GtGenomeNode**)gt_array_get(pdoms, i),
                                *(GtGenomeNode**)gt_array_get(pdoms, i-1))
                >= 0);
    }
    if (gt_feature_node_get_strand(*(GtFeatureNode**) gt_array_get(pdoms, 0UL))
        == GT_STRAND_REVERSE)
    {
      gt_array_reverse(pdoms);
    }
  }

  /* output protein domain data */
  for (i=0;i<gt_array_size(pdoms);i++)
  {
    GtRange pdom_rng;
    GtStr *ali,
          *aaseq;
    GtFeatureNode *fn;
    int rval;

    fn = *(GtFeatureNode**) gt_array_get(pdoms, i);

    ali = gt_genome_node_get_user_data((GtGenomeNode*) fn, "pdom_alignment");
    aaseq = gt_genome_node_get_user_data((GtGenomeNode*) fn, "pdom_aaseq");
    pdom_rng = gt_genome_node_get_range((GtGenomeNode*) fn);

    rval = gt_extract_feature_sequence(pdom_seq, (GtGenomeNode*) fn,
                                       gt_symbol(gt_ft_protein_match), false,
                                       NULL, NULL, rmap, err);

    if (rval)
    {
      had_err = -1;
      break;
    }
    if (ls->write_pdom_alignments && ali)
    {
      char buf[BUFSIZ];

      /* write away alignment */
      (void) snprintf(buf, BUFSIZ-1, "Protein domain alignment in translated "
                                     "sequence for candidate\n'%s':\n\n",
                                     desc);
      gt_file_xwrite(alifile, buf, (size_t) strlen(buf) * sizeof (char));
      gt_file_xwrite(alifile, gt_str_get(ali),
                        (size_t) gt_str_length(ali) * sizeof (char));
      gt_file_xwrite(alifile, "---\n\n", 5 * sizeof (char));
    }
    if (ls->write_pdom_aaseqs && aaseq)
    {
      /* append amino acid sequence */
      gt_str_append_str(pdom_aaseq, aaseq);
    }
    gt_genome_node_release_user_data((GtGenomeNode*) fn, "pdom_alignment");
    gt_genome_node_release_user_data((GtGenomeNode*) fn, "pdom_aaseq");
    seq_length += gt_range_length(&pdom_rng);
  }

  if (!had_err)
  {
    gt_fasta_show_entry(desc,
                        gt_str_get(pdom_seq),
                        seq_length,
                        GT_FSWIDTH,
                        seqfile);
    if (ls->write_pdom_aaseqs)
    {
      gt_fasta_show_entry(desc,
                          gt_str_get(pdom_aaseq),
                          gt_str_length(pdom_aaseq),
                          GT_FSWIDTH,
                          aafile);
    }
  }
  gt_str_delete(pdom_seq);
  gt_str_delete(pdom_aaseq);
  return had_err;
}
Ejemplo n.º 11
0
GtNodeVisitor* gt_ltrdigest_pdom_visitor_new(GtPdomModelSet *model,
                                             double eval_cutoff,
                                             unsigned int chain_max_gap_length,
                                             GtPdomCutoff cutoff,
                                             GtRegionMapping *rmap,
                                             GtError *err)
{
  GtNodeVisitor *nv;
  GtLTRdigestPdomVisitor *lv;
  GtStr *cmd;
  int had_err = 0, i, rval;
  gt_assert(model && rmap);

  rval = system("hmmscan -h > /dev/null");
  if (rval == -1) {
    gt_error_set(err, "error executing system(hmmscan)");
    return NULL;
  }
#ifndef _WIN32
  if (WEXITSTATUS(rval) != 0) {
    gt_error_set(err, "cannot find the hmmscan executable in PATH");
    return NULL;
  }
#else
  /* XXX */
  gt_error_set(err, "hmmscan for Windows not implemented");
  return NULL;
#endif

  nv = gt_node_visitor_create(gt_ltrdigest_pdom_visitor_class());
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  lv->eval_cutoff = eval_cutoff;
  lv->cutoff = cutoff;
  lv->chain_max_gap_length = chain_max_gap_length;
  lv->rmap = rmap;
  lv->output_all_chains = false;
  lv->tag = gt_str_new_cstr("GenomeTools");
  lv->root_type = gt_symbol(gt_ft_LTR_retrotransposon);

  for (i = 0; i < 3; i++) {
    lv->fwd[i] = gt_str_new();
    lv->rev[i] = gt_str_new();
  }

  if (!had_err) {
    cmd = gt_str_new_cstr("hmmscan --cpu ");
    gt_str_append_uint(cmd, gt_jobs);
    gt_str_append_cstr(cmd, " ");
    switch (cutoff) {
      case GT_PHMM_CUTOFF_GA:
        gt_str_append_cstr(cmd, "--cut_ga");
        break;
      case GT_PHMM_CUTOFF_TC:
        gt_str_append_cstr(cmd, "--cut_tc");
        break;
      case GT_PHMM_CUTOFF_NONE:
        gt_str_append_cstr(cmd, "--domE ");
        gt_str_append_double(cmd, eval_cutoff, 50);
        break;
    }
    gt_str_append_cstr(cmd, " ");
    gt_str_append_cstr(cmd, gt_pdom_model_set_get_filename(model));
    gt_str_append_cstr(cmd, " -");
    lv->cmdline = cmd;
    lv->args = gt_cstr_split(gt_str_get(lv->cmdline), ' ');
    gt_log_log("HMMER cmdline: %s", gt_str_get(cmd));
  }
  return nv;
}
Ejemplo n.º 12
0
void gt_ltrdigest_pdom_visitor_set_root_type(GtLTRdigestPdomVisitor *lv,
                                             const char *type)
{
  gt_assert(lv && type);
  lv->root_type = gt_symbol(type);
}
Ejemplo n.º 13
0
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv,
                                                  GtFeatureNode *fn,
                                                  GtError *err)
{
  GtLTRdigestPdomVisitor *lv;
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL;
  int had_err = 0;
  GtRange rng;
  unsigned long i;
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  /* traverse annotation subgraph and find LTR element */
  fni = gt_feature_node_iterator_new(fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode),
               gt_ft_LTR_retrotransposon) == 0) {
      lv->ltr_retrotrans = curnode;
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (!had_err && lv->ltr_retrotrans != NULL) {
    GtCodonIterator *ci;
    GtTranslator *tr;
    GtTranslatorStatus status;
    unsigned long seqlen;
    char translated, *rev_seq;
    FILE *instream;
    GtHMMERParseStatus *pstatus;
    unsigned int frame;
    GtStr *seq;

    seq = gt_str_new();
    rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans);
    lv->leftLTR_5 = rng.start - 1;
    lv->rightLTR_3 = rng.end - 1;
    seqlen = gt_range_length(&rng);

    had_err = gt_extract_feature_sequence(seq,
                                          (GtGenomeNode*) lv->ltr_retrotrans,
                                          gt_symbol(gt_ft_LTR_retrotransposon),
                                          false, NULL, NULL, lv->rmap, err);

    if (!had_err) {
      for (i = 0UL; i < 3UL; i++) {
        gt_str_reset(lv->fwd[i]);
        gt_str_reset(lv->rev[i]);
      }

      /* create translations */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL);
      gt_assert(ci);
      tr = gt_translator_new(ci);
      status = gt_translator_next(tr, &translated, &frame, err);
      while (status == GT_TRANSLATOR_OK && translated) {
        gt_str_append_char(lv->fwd[frame], translated);
        status = gt_translator_next(tr, &translated, &frame, NULL);
      }
      if (status == GT_TRANSLATOR_ERROR) had_err = -1;
      if (!had_err) {
        rev_seq = gt_malloc((size_t) seqlen * sizeof (char));
        strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char));
        (void) gt_reverse_complement(rev_seq, seqlen, NULL);
        gt_codon_iterator_delete(ci);
        ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL);
        gt_translator_set_codon_iterator(tr, ci);
        status = gt_translator_next(tr, &translated, &frame, err);
        while (status == GT_TRANSLATOR_OK && translated) {
          gt_str_append_char(lv->rev[frame], translated);
          status = gt_translator_next(tr, &translated, &frame, NULL);
        }
        if (status == GT_TRANSLATOR_ERROR) had_err = -1;
        gt_free(rev_seq);
      }
      gt_codon_iterator_delete(ci);
      gt_translator_delete(tr);
    }

    /* run HMMER and handle results */
    if (!had_err) {
      int pid, pc[2], cp[2];
      GT_UNUSED int rval;

      (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's
                                               exit status */
      rval = pipe(pc);
      gt_assert(rval == 0);
      rval = pipe(cp);
      gt_assert(rval == 0);

      switch ((pid = (int) fork())) {
        case -1:
          perror("Can't fork");
          exit(1);   /* XXX: error handling */
        case 0:    /* child */
          (void) close(1);    /* close current stdout. */
          rval = dup(cp[1]);  /* make stdout go to write end of pipe. */
          (void) close(0);    /* close current stdin. */
          rval = dup(pc[0]);  /* make stdin come from read end of pipe. */
          (void) close(pc[1]);
          (void) close(cp[0]);
          (void) execvp("hmmscan", lv->args); /* XXX: read path from env */
          perror("couldn't execute hmmscan!");
          exit(1);
        default:    /* parent */
          for (i = 0UL; i < 3UL; i++) {
            char buf[5];
            GT_UNUSED ssize_t written;
            (void) sprintf(buf, ">%lu%c\n", i, '+');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->fwd[i]),
                            (size_t) gt_str_length(lv->fwd[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
            (void) sprintf(buf, ">%lu%c\n", i, '-');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->rev[i]),
                            (size_t) gt_str_length(lv->rev[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
          }
          (void) close(pc[1]);
          (void) close(cp[1]);
          instream = fdopen(cp[0], "r");
          pstatus = gt_hmmer_parse_status_new();
          had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus,
                                                           instream, err);
          (void) fclose(instream);
          if (!had_err)
            had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err);
          gt_hmmer_parse_status_delete(pstatus);
      }
    }
    gt_str_delete(seq);
  }
  if (!had_err)
    had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv);
  return had_err;
}
Ejemplo n.º 14
0
static int construct_mRNAs(GT_UNUSED void *key, void *value, void *data,
                           GtError *err)
{
  ConstructionInfo *cinfo = (ConstructionInfo*) data;
  GtArray *gt_genome_node_array = (GtArray*) value,
          *mRNAs = (GtArray*) cinfo->mRNAs;
  GtGenomeNode *mRNA_node, *first_node, *gn;
  const char *tname;
  GtStrand mRNA_strand;
  GtRange mRNA_range;
  GtStr *mRNA_seqid;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(key && value && data);
   /* at least one node in array */
  gt_assert(gt_array_size(gt_genome_node_array));

  /* determine the range and the strand of the mRNA */
  first_node = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, 0);
  mRNA_range = gt_genome_node_get_range(first_node);
  mRNA_strand = gt_feature_node_get_strand((GtFeatureNode*) first_node);
  mRNA_seqid = gt_genome_node_get_seqid(first_node);

  /* TODO: support discontinuous start/stop codons */
  for (i = 0; !had_err && i < gt_array_size(gt_genome_node_array); i++) {
    gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
    if (gt_feature_node_get_attribute((GtFeatureNode*) gn,
        GTF_PARSER_STOP_CODON_FLAG)) {
      GtUword j;
      GtRange stop_codon_rng = gt_genome_node_get_range(gn);
      bool found_cds = false;
      for (j = 0; !had_err && j < gt_array_size(gt_genome_node_array); j++) {
        GtGenomeNode* gn2;
        GtRange this_rng;
        const char *this_type;
        gn2 = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, j);
        if (gn == gn2) continue;
        this_rng = gt_genome_node_get_range(gn2);
        this_type = gt_feature_node_get_type((GtFeatureNode*) gn2);
        if (this_type == gt_symbol(gt_ft_CDS)) {
          if (gt_range_contains(&this_rng, &stop_codon_rng)) {
            if (cinfo->tidy) {
              gt_warning("stop codon on line %u in file %s is contained in "
                         "CDS in line %u",
                         gt_genome_node_get_line_number(gn),
                         gt_genome_node_get_filename(gn),
                         gt_genome_node_get_line_number(gn2));
              found_cds = true;
            } else {
              gt_error_set(err, "stop codon on line %u in file %s is "
                                "contained in CDS in line %u",
                           gt_genome_node_get_line_number(gn),
                           gt_genome_node_get_filename(gn),
                           gt_genome_node_get_line_number(gn2));
              had_err = -1;
            }
            break;
          }
          if (this_rng.end + 1 == stop_codon_rng.start) {
            this_rng.end = stop_codon_rng.end;
            gt_genome_node_set_range(gn2, &this_rng);
            found_cds = true;
            break;
          }
          if (this_rng.start == stop_codon_rng.end + 1) {
            this_rng.start = stop_codon_rng.start;
            gt_genome_node_set_range(gn2, &this_rng);
            found_cds = true;
            break;
          }
        }
      }
      if (!found_cds) {
        if (!had_err) {
          if (cinfo->tidy) {
            gt_warning("found stop codon on line %u in file %s with no "
                       "flanking CDS, ignoring it",
                       gt_genome_node_get_line_number(gn),
                       gt_genome_node_get_filename(gn));
          } else {
            gt_error_set(err, "found stop codon on line %u in file %s with no "
                              "flanking CDS",
                         gt_genome_node_get_line_number(gn),
                         gt_genome_node_get_filename(gn));
            had_err = -1;
            break;
          }
        }
      } else {
        gt_array_rem(gt_genome_node_array, i);
        gt_genome_node_delete(gn);
      }
    }
  }

  for (i = 1; !had_err && i < gt_array_size(gt_genome_node_array); i++) {
    GtRange range;
    GtStrand strand;
    gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
    range = gt_genome_node_get_range(gn);
    mRNA_range = gt_range_join(&mRNA_range, &range);
    strand = gt_feature_node_get_strand((GtFeatureNode*) gn);
    if (strand != mRNA_strand) {
      gt_error_set(err, "feature %s on line %u has strand %c, but the "
                        "parent transcript has strand %c",
                   (const char*) key,
                   gt_genome_node_get_line_number(gn),
                   GT_STRAND_CHARS[strand],
                   GT_STRAND_CHARS[mRNA_strand]);
      had_err = -1;
      break;
    } else {
      mRNA_strand = gt_strand_join(mRNA_strand, strand);
    }
    if (!had_err && gt_str_cmp(mRNA_seqid, gt_genome_node_get_seqid(gn))) {
      gt_error_set(err, "The features on lines %u and %u refer to different "
                "genomic sequences (``seqname''), although they have the same "
                "gene IDs (``gene_id'') which must be globally unique",
                gt_genome_node_get_line_number(first_node),
                gt_genome_node_get_line_number(gn));
      had_err = -1;
      break;
    }
  }

  if (!had_err) {
    mRNA_node = gt_feature_node_new(mRNA_seqid, gt_ft_mRNA, mRNA_range.start,
                                    mRNA_range.end, mRNA_strand);
    gt_feature_node_add_attribute(((GtFeatureNode*) mRNA_node), "ID", key);
    gt_feature_node_add_attribute(((GtFeatureNode*) mRNA_node), "transcript_id",
                                  key);

    if ((tname = gt_hashmap_get(cinfo->transcript_id_to_name_mapping,
                              (const char*) key)) && strlen(tname) > 0) {
      gt_feature_node_add_attribute((GtFeatureNode*) mRNA_node, GT_GFF_NAME,
                                      tname);
    }

    /* register children */
    for (i = 0; i < gt_array_size(gt_genome_node_array); i++) {
      gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
      gt_feature_node_add_child((GtFeatureNode*) mRNA_node,
                                (GtFeatureNode*) gt_genome_node_ref(gn));
    }

    /* store the mRNA */
    gt_array_add(mRNAs, mRNA_node);
  }

  return had_err;
}
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtSNPAnnotatorStream *sas;
  int had_err = 0;
  bool complete_cluster = false;
  GtGenomeNode *mygn = NULL;
  GtFeatureNode *fn = NULL;
  const char *snv_type = gt_symbol(gt_ft_SNV),
             *snp_type = gt_symbol(gt_ft_SNP),
             *gene_type = gt_symbol(gt_ft_gene);
  gt_error_check(err);
  sas = gt_snp_annotator_stream_cast(ns);

  /* if there are still SNPs left in the buffer, output them */
  if (gt_queue_size(sas->outqueue) > 0) {
    *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
    return had_err;
  } else complete_cluster = false;

  while (!had_err && !complete_cluster) {
    had_err = gt_node_stream_next(sas->merge_stream, &mygn, err);

    /* stop if stream is at the end */
    if (had_err || !mygn) break;

    /* process all feature nodes */
    if ((fn = gt_feature_node_try_cast(mygn))) {
      GtGenomeNode *addgn;
      const char *type = gt_feature_node_get_type(fn);
      GtRange new_rng = gt_genome_node_get_range(mygn);
      if (type == snv_type || type == snp_type) {
        /* -----> this is a SNP <----- */
        if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
          /* it falls into the currently observed range */
          gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn));
        } else {
          /* SNP outside a gene, this cluster is done
             add to out queue and start serving */
          gt_assert(gt_queue_size(sas->outqueue) == 0);
          had_err = snp_annotator_stream_process_current_gene(sas, err);
          gt_queue_add(sas->outqueue, mygn);
          if (gt_queue_size(sas->outqueue) > 0) {
            *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
            complete_cluster = true;
          }
        }
      } else if (type == gene_type) {
        /* -----> this is a gene <----- */
        if (gt_array_size(sas->cur_gene_set) == 0UL) {
          /* new overlapping gene cluster */
          addgn = gt_genome_node_ref(mygn);
          gt_array_add(sas->cur_gene_set, addgn);
          sas->cur_gene_range = gt_genome_node_get_range(mygn);
        } else {
          if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
            /* gene overlaps with current one, add to cluster */
            addgn = gt_genome_node_ref(mygn);
            gt_array_add(sas->cur_gene_set, addgn);
            sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng);
          } else {
            /* finish current cluster and start a new one */
            had_err = snp_annotator_stream_process_current_gene(sas, err);
            if (!had_err) {
              addgn = gt_genome_node_ref(mygn);
              gt_array_add(sas->cur_gene_set, addgn);
              sas->cur_gene_range = gt_genome_node_get_range(mygn);
            }
            if (gt_queue_size(sas->outqueue) > 0) {
              *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
              complete_cluster = true;
            }
          }
        }
        /* from now on, genes are kept in gene cluster arrays only */
        gt_genome_node_delete(mygn);
      }
    } else {
      /* meta node */
      had_err = snp_annotator_stream_process_current_gene(sas, err);
      if (!had_err) {
        gt_queue_add(sas->outqueue, mygn);
      }
      if (gt_queue_size(sas->outqueue) > 0) {
        *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
        complete_cluster = true;
      }
    }
  }

  return had_err;
}