static void snp_annotator_stream_free(GtNodeStream *ns)
{
  GtUword i;
  GtSNPAnnotatorStream *sas;
  if (!ns) return;
  sas = gt_snp_annotator_stream_cast(ns);
  gt_region_mapping_delete(sas->rmap);
  while (gt_queue_size(sas->snps) > 0) {
    gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->snps));
  }
  while (gt_queue_size(sas->outqueue) > 0) {
    gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->outqueue));
  }
  for (i = 0; i < gt_array_size(sas->instreams); i++) {
    gt_node_stream_delete(*(GtNodeStream**) gt_array_get(sas->instreams, i));
  }
  for (i = 0; i < gt_array_size(sas->cur_gene_set); i++) {
    gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(sas->cur_gene_set, i));
  }
  gt_array_delete(sas->cur_gene_set);
  gt_node_stream_delete(sas->merge_stream);
  gt_array_delete(sas->instreams);
  gt_queue_delete(sas->snps);
  gt_queue_delete(sas->outqueue);
}
Exemple #2
0
static int filter_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                              GtError *error)
{
  AgnFilterStream *stream;
  GtFeatureNode *fn;
  int had_err;
  gt_error_check(error);
  stream = filter_stream_cast(ns);

  if(gt_queue_size(stream->cache) > 0)
  {
    *gn = gt_queue_get(stream->cache);
    return 0;
  }

  while(1)
  {
    had_err = gt_node_stream_next(stream->in_stream, gn, error);
    if(had_err)
      return had_err;
    if(!*gn)
      return 0;

    fn = gt_feature_node_try_cast(*gn);
    if(!fn)
      return 0;

    GtFeatureNode *current;
    GtFeatureNodeIterator *iter = gt_feature_node_iterator_new(fn);
    for(current  = gt_feature_node_iterator_next(iter);
        current != NULL;
        current  = gt_feature_node_iterator_next(iter))
    {
      const char *type = gt_feature_node_get_type(current);
      bool keepfeature = false;
      if(gt_hashmap_get(stream->typestokeep, type) != NULL)
        keepfeature = true;

      if(keepfeature)
      {
        gt_genome_node_ref((GtGenomeNode *)current);
        gt_queue_add(stream->cache, current);
      }
    }
    gt_feature_node_iterator_delete(iter);
    gt_genome_node_delete((GtGenomeNode *)fn);
    if(gt_queue_size(stream->cache) > 0)
    {
      *gn = gt_queue_get(stream->cache);
      return 0;
    }
  }

  return 0;
}
GtGenomeNode* gt_orphanage_get_orphan(GtOrphanage *o)
{
  gt_assert(o);
  if (gt_queue_size(o->orphans))
    return gt_queue_get(o->orphans);
  return NULL;
}
void gt_desc_buffer_reset(GtDescBuffer *db)
{
  GtUword laststartpos;
  gt_assert(db);

  if (!db->dirty) return;
  if (gt_queue_size(db->startqueue) == 0) {
    db->length = 0;
    db->dirty = false;
    return;
  }
  laststartpos = (GtUword) gt_queue_head(db->startqueue);
  if (laststartpos != 0) {
    laststartpos = (GtUword) gt_queue_get(db->startqueue);
    db->length = db->length - laststartpos;
    if (db->length >= laststartpos) {
      /* strings overlap */
      memmove(db->buf, db->buf + laststartpos, db->length * sizeof (char));
    } else {
      /* no overlap */
      memcpy(db->buf, db->buf + laststartpos, db->length * sizeof (char));
    }
    gt_queue_add(db->startqueue, (void*) 0);
  }
  db->dirty = false;
}
Exemple #5
0
int gt_queue_iterate_reverse(GtQueue *q, GtQueueProcessor gt_queue_processor,
                             void *info, GtError *err)
{
  long i;
  int rval;
  gt_error_check(err);
  gt_assert(q && gt_queue_processor);
  if (gt_queue_size(q)) {
    if (q->front < q->back) { /* no wraparound */
      for (i = q->back-1; i >= q->front; i--) {
        if ((rval = gt_queue_processor(q->contents + i, info, err)))
          return rval;
      }
    }
    else { /* wraparound */
      for (i = q->back-1; i >= 0; i--) {
        if ((rval = gt_queue_processor(q->contents + i, info, err)))
          return rval;
      }
      for (i = q->size-1; i >= q->front; i--) {
        if ((rval = gt_queue_processor(q->contents +i, info, err))) return rval;
      }
    }
  }
  return 0;
}
static int snp_annotator_stream_process_current_gene(GtSNPAnnotatorStream *sas,
                                                     GtError *err)
{
  int had_err = 0;
  GtUword i;
  GtUword nof_genes = gt_array_size(sas->cur_gene_set);
  gt_error_check(err);

  if (gt_queue_size(sas->snps) > 0) {
    /* we need to process SNPs for a gene cluster*/
    gt_assert(gt_queue_size(sas->outqueue) == 0);

    for (i = 0; !had_err && i < nof_genes; i++) {
      GtNodeVisitor *sav;
      GtFeatureNode *gene;
      gene = *(GtFeatureNode**) gt_array_get(sas->cur_gene_set, i);
      sav = gt_snp_annotator_visitor_new(gene, sas->tt, sas->rmap, err);
      if (!sav)
        had_err = -1;
      if (!had_err) {
        if (i < nof_genes-1) {
          had_err = gt_queue_iterate(sas->snps,
                                     snp_annotator_stream_process_snp,
                                     sav, err);
        } else {
          while (!had_err && gt_queue_size(sas->snps) > 0) {
            GtFeatureNode *snp = (GtFeatureNode*) gt_queue_get(sas->snps);
            had_err = gt_genome_node_accept((GtGenomeNode*) snp, sav, err);
            gt_queue_add(sas->outqueue, snp);
            gt_genome_node_delete((GtGenomeNode*) snp);
          }
        }
        gt_node_visitor_delete(sav);
      }
      gt_genome_node_delete((GtGenomeNode*) gene);
    }
  } else {
    /* no SNPs for this gene cluster, delete it */
    for (i = 0; !had_err && i < nof_genes; i++) {
      gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(sas->cur_gene_set,
                                                           i));
    }
  }
  gt_assert(gt_queue_size(sas->snps) == 0);
  gt_array_reset(sas->cur_gene_set);
  return had_err;
}
Exemple #7
0
static void buffer_stream_free(GtNodeStream *ns)
{
  GtBufferStream *bs = buffer_stream_cast(ns);
  while (gt_queue_size(bs->node_buffer))
    gt_genome_node_delete(gt_queue_get(bs->node_buffer));
  gt_queue_delete(bs->node_buffer);
  gt_node_stream_delete(bs->in_stream);
}
void gt_orphanage_reset(GtOrphanage *o)
{
  gt_assert(o);
  while (gt_queue_size(o->orphans))
    gt_genome_node_delete(gt_queue_get(o->orphans));
  gt_cstr_table_reset(o->missing_parents);
  gt_cstr_table_reset(o->orphan_ids);
}
static void gtf_in_stream_free(GtNodeStream *ns)
{
  GtGTFInStream *gtf_in_stream = gtf_in_stream_cast(ns);
  gt_free(gtf_in_stream->filename);
  gt_type_checker_delete(gtf_in_stream->type_checker);
  while (gt_queue_size(gtf_in_stream->genome_node_buffer))
    gt_genome_node_delete(gt_queue_get(gtf_in_stream->genome_node_buffer));
  gt_queue_delete(gtf_in_stream->genome_node_buffer);
}
Exemple #10
0
static void queue_wrap(GtQueue *q, bool free_contents)
{
  gt_assert(q);
  if (free_contents) {
    while (gt_queue_size(q))
      gt_free(gt_queue_get(q));
  }
  gt_free(q->contents);
  gt_free(q);
}
void gt_orphanage_delete(GtOrphanage *o)
{
  if (!o) return;
  gt_cstr_table_delete(o->orphan_ids);
  gt_cstr_table_delete(o->missing_parents);
  while (gt_queue_size(o->orphans))
    gt_genome_node_delete(gt_queue_get(o->orphans));
  gt_queue_delete(o->orphans);
  gt_free(o);
}
static void feature_in_stream_free(GtNodeStream *ns)
{
  GtFeatureInStream *stream = feature_in_stream_cast(ns);
  gt_str_array_delete(stream->seqids);
  while (gt_queue_size(stream->regioncache) > 0)
  {
    GtGenomeNode *gn = gt_queue_get(stream->regioncache);
    gt_genome_node_delete(gn);
  }
  gt_queue_delete(stream->regioncache);
}
Exemple #13
0
void* gt_queue_get(GtQueue *q)
{
  void *contents;
  gt_assert(q && gt_queue_size(q));
  /* get contents */
  contents = q->contents[q->front++];
  /* adjust indices */
  if (q->front == q->back)
    q->front = q->back = 0; /* reset */
  else if (q->front == q->size)
    q->front = 0; /* wraparound */
  return contents;
}
Exemple #14
0
static void gff3_in_stream_plain_free(GtNodeStream *ns)
{
  GtGFF3InStreamPlain *gff3_in_stream_plain = gff3_in_stream_plain_cast(ns);
  gt_str_array_delete(gff3_in_stream_plain->files);
  gt_str_delete(gff3_in_stream_plain->stdinstr);
  while (gt_queue_size(gff3_in_stream_plain->genome_node_buffer)) {
    gt_genome_node_delete(gt_queue_get(gff3_in_stream_plain
                                       ->genome_node_buffer));
  }
  gt_queue_delete(gff3_in_stream_plain->genome_node_buffer);
  gt_gff3_parser_delete(gff3_in_stream_plain->gff3_parser);
  gt_cstr_table_delete(gff3_in_stream_plain->used_types);
  gt_file_delete(gff3_in_stream_plain->fpin);
}
static int gtf_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                              GT_UNUSED GtError *err)
{
  GtGTFInStream *is;
  int had_err = 0;
  gt_error_check(err);
  is = gtf_in_stream_cast(ns);
  if (!is->file_processed) {
    had_err = gtf_in_stream_process_file(is, err);
    is->file_processed = true;
  }
  if (!had_err && gt_queue_size(is->genome_node_buffer)) {
    /* we still have a node in the buffer -> serve it from there */
    *gn = gt_queue_get(is->genome_node_buffer);
    return 0;
  }
  if (!had_err) {
    /* the buffer is empty */
    gt_assert(!gt_queue_size(is->genome_node_buffer));
    *gn = NULL;
  }
  return had_err;
}
Exemple #16
0
void gt_queue_remove(GtQueue *q, void *elem)
{
  long i, elemidx;
  gt_assert(q &&  gt_queue_size(q));
  if (q->front < q->back) { /* no wraparound */
    for (i = q->back-1; i >= q->front; i--) {
      if (q->contents[i] == elem)
        break;
    }
    elemidx = i;
    gt_assert(elemidx >= q->front); /* valid element found */
    for (i = elemidx+1; i < q->back; i++)
      q->contents[i-1] = q->contents[i];
    q->contents[q->back-1] = NULL;
    q->back--;
    if (q->front == q->back)
      q->front = q->back = 0; /* reset */
  }
  else { /* wraparound */
    for (i = q->back-1; i >= 0; i--) {
      if (q->contents[i] == elem)
        break;
    }
    elemidx = i;
    if (elemidx >= 0) { /* element found */
      for (i = elemidx+1; i < q->back; i++)
        q->contents[i-1] = q->contents[i];
      q->contents[q->back-1] = NULL;
      q->back--;
      if (q->back == 0) q->back = q->size;
      return;
    }
    for (i = q->size-1; i >= q->front; i--) {
      if (q->contents[i] == elem)
        break;
    }
    elemidx = i;
    gt_assert(elemidx >= q->front); /* valid element found */
    for (i = elemidx+1; i < q->size; i++)
      q->contents[i-1] = q->contents[i];
    q->contents[q->size-1] = q->contents[0];
    for (i = 1; i < q->back; i++)
      q->contents[i-1] = q->contents[i];
    q->contents[q->back-1] = NULL;
    q->back--;
    if (q->back == 0)
      q->back = q->size;
  }
}
Exemple #17
0
void gt_bed_parser_delete(GtBEDParser *bed_parser)
{
  if (!bed_parser) return;
  gt_free(bed_parser->block_type);
  gt_free(bed_parser->thick_feature_type);
  gt_free(bed_parser->feature_type);
  gt_str_delete(bed_parser->another_word);
  gt_str_delete(bed_parser->word);
  gt_hashmap_delete(bed_parser->seqid_to_str_mapping);
  while (gt_queue_size(bed_parser->feature_nodes))
    gt_genome_node_delete(gt_queue_get(bed_parser->feature_nodes));
  gt_queue_delete(bed_parser->feature_nodes);
  gt_region_node_builder_delete(bed_parser->region_node_builder);
  gt_free(bed_parser);
}
Exemple #18
0
static int buffer_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtBufferStream *bs;
  gt_error_check(err);
  bs = buffer_stream_cast(ns);
  if (bs->buffering) {
    int had_err = gt_node_stream_next(bs->in_stream, gn, err);
    if (!had_err && *gn)
      gt_queue_add(bs->node_buffer, gt_genome_node_ref(*gn));
    return had_err;
  }
  else {
    *gn = gt_queue_size(bs->node_buffer) ? gt_queue_get(bs->node_buffer) : NULL;
    return 0;
  }
}
static int feature_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                   GtError *error)
{
  GtFeatureInStream *stream = feature_in_stream_cast(ns);
  gt_error_check(error);

  if (!stream->init)
  {
    feature_in_stream_init(stream);
    stream->init = true;
  }

  if (gt_queue_size(stream->regioncache) > 0)
  {
    GtGenomeNode *region = gt_queue_get(stream->regioncache);
    *gn = region;
    return 0;
  }

  if (stream->featurecache == NULL || gt_array_size(stream->featurecache) == 0)
  {
    if (stream->featurecache != NULL)
    {
      gt_array_delete(stream->featurecache);
      stream->featurecache = NULL;
    }

    if (stream->seqindex == gt_str_array_size(stream->seqids))
    {
      *gn = NULL;
      return 0;
    }

    const char *seqid = gt_str_array_get(stream->seqids, stream->seqindex++);
    stream->featurecache = gt_feature_index_get_features_for_seqid(stream->fi,
                                                                   seqid,
                                                                   error);
    gt_array_sort(stream->featurecache, (GtCompare)gt_genome_node_compare);
    gt_array_reverse(stream->featurecache);
  }

  GtGenomeNode *feat = *(GtGenomeNode **)gt_array_pop(stream->featurecache);
  *gn = gt_genome_node_ref(feat);
  return 0;
}
Exemple #20
0
int gt_bed_parser_parse(GtBEDParser *bed_parser, GtQueue *genome_nodes,
                        const char *filename, GtError *err)
{
  GtIO *bed_file;
  int had_err;
  gt_error_check(err);
  gt_assert(bed_parser && genome_nodes);
  bed_file = gt_io_new(filename, "r");
  /* parse BED file */
  had_err = parse_bed_file(bed_parser, bed_file, err);
  /* process created region and feature nodes */
  gt_region_node_builder_build(bed_parser->region_node_builder, genome_nodes);
  gt_region_node_builder_reset(bed_parser->region_node_builder);
  while (gt_queue_size(bed_parser->feature_nodes))
    gt_queue_add(genome_nodes, gt_queue_get(bed_parser->feature_nodes));
  gt_io_delete(bed_file);
  return had_err;
}
static int gff3_numsorted_out_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                           GtError *err)
{
  GtGFF3NumsortedOutStream *gff3_out_stream;
  int had_err = 0;
  GtUword i = 0;
  gt_error_check(err);
  gff3_out_stream = gff3_numsorted_out_stream_cast(ns);
  if (!gff3_out_stream->outqueue) {
    gff3_out_stream->outqueue = gt_queue_new();
    while (!(had_err =
                    gt_node_stream_next(gff3_out_stream->in_stream, gn, err))) {
      if (!*gn) break;
      gt_array_add(gff3_out_stream->buffer, *gn);
    }
    if (!had_err) {
      gt_genome_nodes_sort_stable_with_func(gff3_out_stream->buffer,
                             (GtCompare) gt_genome_node_compare_numeric_seqids);
      for (i = 0; !had_err && i < gt_array_size(gff3_out_stream->buffer); i++) {
        GtGenomeNode *mygn = *(GtGenomeNode**)
                                       gt_array_get(gff3_out_stream->buffer, i);
        gt_queue_add(gff3_out_stream->outqueue, mygn);
      }
    }
  }
  if (gff3_out_stream->outqueue && !had_err) {
    if (gt_queue_size(gff3_out_stream->outqueue) > 0) {
      GtGenomeNode *mygn = (GtGenomeNode*)
                                        gt_queue_get(gff3_out_stream->outqueue);
      gt_assert(mygn);
      had_err = gt_genome_node_accept(mygn, gff3_out_stream->gff3_visitor, err);
      if (!had_err)
        *gn = mygn;
    }
  }
  return had_err;
}
Exemple #22
0
static int gff3_in_stream_plain_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtGFF3InStreamPlain *is = gff3_in_stream_plain_cast(ns);
  GtStr *filenamestr;
  int had_err = 0, status_code;

  gt_error_check(err);

  if (gt_queue_size(is->genome_node_buffer) > 1) {
    /* we still have at least two nodes in the buffer -> serve from there */
    *gn = gt_queue_get(is->genome_node_buffer);
    return 0;
  }

  /* the buffer is empty or has one element */
  gt_assert(gt_queue_size(is->genome_node_buffer) <= 1);

  for (;;) {
    /* open file if necessary */
    if (!is->file_is_open) {
      if (gt_str_array_size(is->files) &&
          is->next_file == gt_str_array_size(is->files)) {
        break;
      }
      if (gt_str_array_size(is->files)) {
        if (strcmp(gt_str_array_get(is->files, is->next_file), "-") == 0) {
          if (is->stdin_argument) {
            gt_error_set(err, "multiple specification of argument file \"-\"");
            had_err = -1;
            break;
          }
          is->fpin = gt_file_xopen(NULL, "r");
          is->file_is_open = true;
          is->stdin_argument = true;
        }
        else {
          is->fpin = gt_file_xopen(gt_str_array_get(is->files,
                                                       is->next_file), "r");
          is->file_is_open = true;
        }
        is->next_file++;
      }
      else {
        if (is->stdin_processed)
          break;
        is->fpin = NULL;
        is->file_is_open = true;
      }
      is->line_number = 0;

      if (!had_err && is->progress_bar) {
        printf("processing file \"%s\"\n", gt_str_array_size(is->files)
               ? gt_str_array_get(is->files, is->next_file-1) : "stdin");
      }
      if (!had_err && is->fpin && is->progress_bar) {
        gt_progressbar_start(&is->line_number,
                            gt_file_number_of_lines(gt_str_array_get(is->files,
                                                             is->next_file-1)));
      }
    }

    gt_assert(is->file_is_open);

    filenamestr = gt_str_array_size(is->files)
                  ? gt_str_array_get_str(is->files, is->next_file-1)
                  : is->stdinstr;
    /* read two nodes */
    had_err = gt_gff3_parser_parse_genome_nodes(is->gff3_parser, &status_code,
                                                is->genome_node_buffer,
                                                is->used_types, filenamestr,
                                                &is->line_number, is->fpin,
                                                err);
    if (had_err)
      break;
    if (status_code != EOF) {
      had_err = gt_gff3_parser_parse_genome_nodes(is->gff3_parser, &status_code,
                                                  is->genome_node_buffer,
                                                  is->used_types, filenamestr,
                                                  &is->line_number, is->fpin,
                                                  err);
      if (had_err)
        break;
    }

    if (status_code == EOF) {
      /* end of current file */
      if (is->progress_bar) gt_progressbar_stop();
      gt_file_delete(is->fpin);
      is->fpin = NULL;
      is->file_is_open = false;
      gt_gff3_parser_reset(is->gff3_parser);
      if (!gt_str_array_size(is->files)) {
        is->stdin_processed = true;
        break;
      }
      continue;
    }

    gt_assert(gt_queue_size(is->genome_node_buffer));

    /* make sure the parsed nodes are sorted */
    if (is->ensure_sorting && gt_queue_size(is->genome_node_buffer) > 1) {
      GtGenomeNode *last_node = NULL;
      /* a sorted stream can have at most one input file */
      gt_assert(gt_str_array_size(is->files) == 0 ||
                gt_str_array_size(is->files) == 1);
      had_err = gt_queue_iterate(is->genome_node_buffer, buffer_is_sorted,
                                 &last_node, err);
    }
    if (!had_err) {
      *gn = gt_queue_get(is->genome_node_buffer);
    }
    return had_err;
  }
  gt_assert(!gt_queue_size(is->genome_node_buffer));
  *gn = NULL;
  return had_err;
}
bool agn_infer_cds_visitor_unit_test(AgnUnitTest *test)
{
  GtQueue *queue = gt_queue_new();
  infer_cds_visitor_test_data(queue);
  agn_assert(gt_queue_size(queue) == 4);

  GtFeatureNode *fn = gt_queue_get(queue);
  GtArray *cds = agn_typecheck_select(fn, agn_typecheck_cds);
  bool grape1 = (gt_array_size(cds) == 4);
  if(grape1)
  {
    GtGenomeNode *cds2 = *(GtGenomeNode **)gt_array_get(cds, 1);
    GtRange range = gt_genome_node_get_range(cds2);
    grape1 = (range.start == 349 && range.end == 522);
  }
  agn_unit_test_result(test, "grape test sans UTRs", grape1);
  gt_genome_node_delete((GtGenomeNode *)fn);
  gt_array_delete(cds);

  fn = gt_queue_get(queue);
  cds = agn_typecheck_select(fn, agn_typecheck_cds);
  bool grape2 = (gt_array_size(cds) == 1);
  if(grape2)
  {
    GtGenomeNode *cds1 = *(GtGenomeNode **)gt_array_get(cds, 0);
    GtRange range = gt_genome_node_get_range(cds1);
    GtStrand strand = gt_feature_node_get_strand((GtFeatureNode *)cds1);
    grape2 = (range.start == 10747 && range.end == 11577 &&
              strand == GT_STRAND_REVERSE);
  }
  agn_unit_test_result(test, "grape test with UTRs, strand check", grape2);
  gt_genome_node_delete((GtGenomeNode *)fn);
  gt_array_delete(cds);

  fn = gt_queue_get(queue);
  cds = agn_typecheck_select(fn, agn_typecheck_cds);
  bool grape3 = (gt_array_size(cds) == 2);
  if(grape3)
  {
    GtGenomeNode *cds2 = *(GtGenomeNode **)gt_array_get(cds, 1);
    GtRange range = gt_genome_node_get_range(cds2);
    grape3 = (range.start == 22651 && range.end == 23022);
  }
  agn_unit_test_result(test, "grape test 3", grape3);
  gt_genome_node_delete((GtGenomeNode *)fn);
  gt_array_delete(cds);

  fn = gt_queue_get(queue);
  cds = agn_typecheck_select(fn, agn_typecheck_cds);
  bool grape4 = (gt_array_size(cds) == 12);
  if(grape4)
  {
    GtGenomeNode *cds7 = *(GtGenomeNode **)gt_array_get(cds, 6);
    GtRange range = gt_genome_node_get_range(cds7);
    grape4 = (range.start == 27956 && range.end == 27996);
  }
  agn_unit_test_result(test, "grape test 4", grape4);
  gt_genome_node_delete((GtGenomeNode *)fn);
  gt_array_delete(cds);

  while(gt_queue_size(queue) > 0)
  {
    GtGenomeNode *cds_n = gt_queue_get(queue);
    gt_genome_node_delete(cds_n);
  }
  gt_queue_delete(queue);
  return agn_unit_test_success(test);
}
Exemple #24
0
void* gt_queue_head(GtQueue *q)
{
  gt_assert(q && gt_queue_size(q));
  return q->contents[q->front];
}
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtSNPAnnotatorStream *sas;
  int had_err = 0;
  bool complete_cluster = false;
  GtGenomeNode *mygn = NULL;
  GtFeatureNode *fn = NULL;
  const char *snv_type = gt_symbol(gt_ft_SNV),
             *snp_type = gt_symbol(gt_ft_SNP),
             *gene_type = gt_symbol(gt_ft_gene);
  gt_error_check(err);
  sas = gt_snp_annotator_stream_cast(ns);

  /* if there are still SNPs left in the buffer, output them */
  if (gt_queue_size(sas->outqueue) > 0) {
    *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
    return had_err;
  } else complete_cluster = false;

  while (!had_err && !complete_cluster) {
    had_err = gt_node_stream_next(sas->merge_stream, &mygn, err);

    /* stop if stream is at the end */
    if (had_err || !mygn) break;

    /* process all feature nodes */
    if ((fn = gt_feature_node_try_cast(mygn))) {
      GtGenomeNode *addgn;
      const char *type = gt_feature_node_get_type(fn);
      GtRange new_rng = gt_genome_node_get_range(mygn);
      if (type == snv_type || type == snp_type) {
        /* -----> this is a SNP <----- */
        if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
          /* it falls into the currently observed range */
          gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn));
        } else {
          /* SNP outside a gene, this cluster is done
             add to out queue and start serving */
          gt_assert(gt_queue_size(sas->outqueue) == 0);
          had_err = snp_annotator_stream_process_current_gene(sas, err);
          gt_queue_add(sas->outqueue, mygn);
          if (gt_queue_size(sas->outqueue) > 0) {
            *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
            complete_cluster = true;
          }
        }
      } else if (type == gene_type) {
        /* -----> this is a gene <----- */
        if (gt_array_size(sas->cur_gene_set) == 0UL) {
          /* new overlapping gene cluster */
          addgn = gt_genome_node_ref(mygn);
          gt_array_add(sas->cur_gene_set, addgn);
          sas->cur_gene_range = gt_genome_node_get_range(mygn);
        } else {
          if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
            /* gene overlaps with current one, add to cluster */
            addgn = gt_genome_node_ref(mygn);
            gt_array_add(sas->cur_gene_set, addgn);
            sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng);
          } else {
            /* finish current cluster and start a new one */
            had_err = snp_annotator_stream_process_current_gene(sas, err);
            if (!had_err) {
              addgn = gt_genome_node_ref(mygn);
              gt_array_add(sas->cur_gene_set, addgn);
              sas->cur_gene_range = gt_genome_node_get_range(mygn);
            }
            if (gt_queue_size(sas->outqueue) > 0) {
              *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
              complete_cluster = true;
            }
          }
        }
        /* from now on, genes are kept in gene cluster arrays only */
        gt_genome_node_delete(mygn);
      }
    } else {
      /* meta node */
      had_err = snp_annotator_stream_process_current_gene(sas, err);
      if (!had_err) {
        gt_queue_add(sas->outqueue, mygn);
      }
      if (gt_queue_size(sas->outqueue) > 0) {
        *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
        complete_cluster = true;
      }
    }
  }

  return had_err;
}
Exemple #26
0
int gt_queue_unit_test(GtError *err)
{
  long check_counter = 0, check_counter_reverse = 1023;
  unsigned long i;
  int had_err = 0;
  GtQueue *q;

  gt_error_check(err);

  /* without wraparound */
  q = gt_queue_new();
  gt_ensure(had_err, !gt_queue_size(q));
  for (i = 0; !had_err && i < 1024; i++) {
    gt_queue_add(q, (void*) i);
    gt_ensure(had_err, gt_queue_size(q) == i + 1);
  }
  if (!had_err)
    had_err = gt_queue_iterate(q, check_queue, &check_counter, err);
  if (!had_err) {
    had_err = gt_queue_iterate_reverse(q, check_queue_reverse,
                                    &check_counter_reverse, err);
  }
  gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL));
  gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL));
  if (!had_err) {
    gt_queue_remove(q, (void*) 0);
    gt_ensure(had_err, gt_queue_size(q) == 1023);
  }
  for (i = 1; !had_err && i < 1024; i++) {
    gt_ensure(had_err, gt_queue_head(q) == (void*) i);
    gt_ensure(had_err, gt_queue_get(q) == (void*) i);
    gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1);
  }
  gt_ensure(had_err, !gt_queue_size(q));
  gt_queue_delete(q);

  /* with wraparound (without full queue) */
  if (!had_err) {
    q = gt_queue_new();
    gt_ensure(had_err, !gt_queue_size(q));
    for (i = 0; !had_err && i < 1024; i++) {
      gt_queue_add(q, (void*) i);
      gt_ensure(had_err, gt_queue_size(q) == i + 1);
    }
    check_counter = 0;
    check_counter_reverse = 1023;
    if (!had_err)
      had_err = gt_queue_iterate(q, check_queue, &check_counter, err);
    gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL));
    gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL));
    if (!had_err) {
      had_err = gt_queue_iterate_reverse(q, check_queue_reverse,
                                         &check_counter_reverse, err);
    }
    for (i = 0; !had_err && i < 512; i++) {
      gt_ensure(had_err, gt_queue_head(q) == (void*) i);
      gt_ensure(had_err, gt_queue_get(q) == (void*) i);
      gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1);
    }
    for (i = 0; !had_err && i < 512; i++) {
      gt_queue_add(q, (void*) (i + 1024));
      gt_ensure(had_err, gt_queue_size(q) == 512 + i + 1);
    }
    check_counter = 512;
    check_counter_reverse = 1535;
    if (!had_err)
      had_err = gt_queue_iterate(q, check_queue, &check_counter, err);
    if (!had_err) {
      had_err = gt_queue_iterate_reverse(q, check_queue_reverse,
                                         &check_counter_reverse, err);
    }
    gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL));
    gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL));
    if (!had_err) {
      gt_queue_remove(q, (void*) 512);
      gt_ensure(had_err, gt_queue_size(q) == 1023);
    }
    for (i = 1; !had_err && i < 1024; i++) {
      gt_ensure(had_err, gt_queue_head(q) == (void*) (512 + i));
      gt_ensure(had_err, gt_queue_get(q) == (void*) (512 + i));
      gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1);
    }
    gt_ensure(had_err, !gt_queue_size(q));
    gt_queue_delete(q);
  }

  /* with wraparound (with full queue) */
  if (!had_err) {
    q = gt_queue_new();
    gt_ensure(had_err, !gt_queue_size(q));
    for (i = 0; !had_err && i < 1024; i++) {
      gt_queue_add(q, (void*) i);
      gt_ensure(had_err, gt_queue_size(q) == i + 1);
    }
    check_counter = 0;
    check_counter_reverse = 1023;
    if (!had_err)
      had_err = gt_queue_iterate(q, check_queue, &check_counter, err);
    if (!had_err) {
      had_err = gt_queue_iterate_reverse(q, check_queue_reverse,
                                      &check_counter_reverse, err);
    }
    gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL));
    gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL));
    for (i = 0; !had_err && i < 512; i++) {
      gt_ensure(had_err, gt_queue_head(q) == (void*) i);
      gt_ensure(had_err, gt_queue_get(q) == (void*) i);
      gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1);
    }
    for (i = 0; !had_err && i < 1024; i++) {
      gt_queue_add(q, (void*) (i + 1024));
      gt_ensure(had_err, gt_queue_size(q) == 512 + i + 1);
    }
    check_counter = 512;
    check_counter_reverse = 2047;
    if (!had_err)
      had_err = gt_queue_iterate(q, check_queue, &check_counter, err);
    if (!had_err) {
      had_err = gt_queue_iterate_reverse(q, check_queue_reverse,
                                      &check_counter_reverse, err);
    }
    gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL));
    gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL));
    if (!had_err) {
      gt_queue_remove(q, (void*) 512);
      gt_ensure(had_err, gt_queue_size(q) == 1535);
    }
    for (i = 1; !had_err && i < 1536; i++) {
      gt_ensure(had_err, gt_queue_head(q) == (void*) (512 + i));
      gt_ensure(had_err, gt_queue_get(q) == (void*) (512 + i));
      gt_ensure(had_err, gt_queue_size(q) == 1536 - i - 1);
    }
    gt_ensure(had_err, !gt_queue_size(q));
    gt_queue_delete(q);
  }

  /* test a corner case */
  if (!had_err) {
    q = gt_queue_new();
    gt_queue_add(q, (void*) 1);
    gt_ensure(had_err, gt_queue_size(q) == 1);
    if (!had_err)
      gt_queue_add(q, (void*) 1);
    gt_ensure(had_err, gt_queue_size(q) == 2);
    gt_ensure(had_err, gt_queue_get(q));
    gt_ensure(had_err, gt_queue_size(q) == 1);
    if (!had_err)
      gt_queue_add(q, (void*) 1);
    gt_ensure(had_err, gt_queue_size(q) == 2);
    gt_ensure(had_err, gt_queue_get(q));
    gt_ensure(had_err, gt_queue_size(q) == 1);
    if (!had_err)
      gt_queue_add(q, (void*) 1);
    gt_ensure(had_err, gt_queue_size(q) == 2);
    gt_ensure(had_err, gt_queue_get(q));
    gt_ensure(had_err, gt_queue_size(q) == 1);
    gt_ensure(had_err, gt_queue_get(q));
    gt_ensure(had_err, gt_queue_size(q) == 0);
    if (!had_err)
      gt_queue_add(q, (void*) 1);
    gt_ensure(had_err, gt_queue_size(q) == 1);
    gt_ensure(had_err, gt_queue_get(q));
    gt_ensure(had_err, gt_queue_size(q) == 0);
    gt_queue_delete(q);
  }

  /* gt_queue_remove() corner case */
  if (!had_err) {
    q = gt_queue_new();
    gt_queue_add(q, (void*) 1);
    gt_ensure(had_err, gt_queue_size(q) == 1);
    gt_queue_remove(q, (void*) 1);
    gt_ensure(had_err, gt_queue_size(q) == 0);
    gt_queue_delete(q);
  }

  /* gt_queue_remove() corner case */
  if (!had_err) {
    q = gt_queue_new();
    gt_queue_add(q, (void*) 0);
    gt_queue_add(q, (void*) 1);
    gt_queue_add(q, (void*) 2);
    gt_queue_add(q, (void*) 3);
    gt_ensure(had_err, gt_queue_get(q) == (void*) 0);
    gt_ensure(had_err, gt_queue_get(q) == (void*) 1);
    gt_queue_add(q, (void*) 4);
    gt_queue_add(q, (void*) 5);
    gt_queue_remove(q, (void*) 4);
    gt_queue_remove(q, (void*) 2);
    gt_queue_remove(q, (void*) 5);
    gt_queue_remove(q, (void*) 3);
    gt_ensure(had_err, gt_queue_size(q) == 0);
    gt_queue_delete(q);
  }

  /* delete with contents */
  if (!had_err) {
    q = gt_queue_new();
    gt_ensure(had_err, !gt_queue_size(q));
    if (!had_err)
      gt_queue_add(q, gt_calloc(1, 16));
    gt_ensure(had_err, gt_queue_size(q) == 1);
    if (!had_err)
      gt_queue_add(q, gt_calloc(1, 32));
    gt_ensure(had_err, gt_queue_size(q) == 2);
    gt_queue_delete_with_contents(q);
  }

  return had_err;
}
Exemple #27
0
// Main method
int main(int argc, char * const *argv)
{
  GtError *error;
  GtLogger *logger;
  GtQueue *streams;
  GtNodeStream *stream, *last_stream;
  CanonGFF3Options options = { NULL, NULL, false };

  gt_lib_init();
  error = gt_error_new();
  canon_gff3_parse_options(argc, argv + 0, &options, error);

  streams = gt_queue_new();
  logger = gt_logger_new(true, "", stderr);

  stream = gt_gff3_in_stream_new_unsorted(argc - optind, (const char **)
                                                          argv+optind);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(options.infer)
  {
    GtHashmap *type_parents = gt_hashmap_new(GT_HASH_STRING, gt_free_func,
                                             gt_free_func);
    gt_hashmap_add(type_parents, gt_cstr_dup("mRNA"), gt_cstr_dup("gene"));
    gt_hashmap_add(type_parents, gt_cstr_dup("tRNA"), gt_cstr_dup("gene"));
    stream = agn_infer_parent_stream_new(last_stream,
                                                 type_parents);
    gt_hashmap_delete(type_parents);
    gt_queue_add(streams, stream);
    last_stream = stream;
  }

  stream = agn_gene_stream_new(last_stream, logger);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(options.source != NULL)
  {
    GtNodeVisitor *ssv = gt_set_source_visitor_new(options.source);
    stream = gt_visitor_stream_new(last_stream, ssv);
    gt_queue_add(streams, stream);
    last_stream = stream;
  }

  stream = gt_gff3_out_stream_new(last_stream, options.outstream);
  if(!options.infer)
    gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(gt_node_stream_pull(last_stream, error) == -1)
  {
    fprintf(stderr, "[CanonGFF3] error processing node stream: %s",
            gt_error_get(error));
  }

  while(gt_queue_size(streams) > 0)
  {
    stream = gt_queue_get(streams);
    gt_node_stream_delete(stream);
  }
  gt_queue_delete(streams);
  if(options.source != NULL)
    gt_str_delete(options.source);
  if(options.outstream != NULL)
    gt_file_delete(options.outstream);
  gt_error_delete(error);
  gt_logger_delete(logger);
  gt_lib_clean();

  return 0;
}
Exemple #28
0
unsigned long gt_select_visitor_node_buffer_size(GtNodeVisitor *nv)
{
  GtSelectVisitor *select_visitor = select_visitor_cast(nv);
  return gt_queue_size(select_visitor->node_buffer);
}
GtNodeVisitor*
agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams)
{
  agn_assert(astream);

  // Create the node visitor
  GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class());
  AgnGaevalVisitor *v = gaeval_visitor_cast(nv);
  v->alignments = gt_feature_index_memory_new();
  v->tsvout = NULL;
  v->params = gparams;

  // Check that sum of weights is 1.0
  double weights_total = gparams.alpha + gparams.beta +
                         gparams.gamma + gparams.epsilon;
  if(fabs(weights_total - 1.0) > 0.0001)
  {
    fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: "
            "sum of weights is not 1.0 %.3lf; integrity calculations will be "
            "incorrect\n", weights_total);
  }


  // Set up node stream to load alignment features into memory
  GtQueue *streams = gt_queue_new();
  GtNodeStream *stream, *last_stream;
  GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match");
  gt_hashmap_add(typestokeep, "EST_match", "EST_match");
  gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match");
  stream = agn_filter_stream_new(astream, typestokeep);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_feature_out_stream_new(last_stream, v->alignments);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match",
                                       "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  // Process the node stream
  GtError *error = gt_error_new();
  int result = gt_node_stream_pull(last_stream, error);
  if(result == -1)
  {
    fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n",
            gt_error_get(error));
    gt_node_visitor_delete(nv);
    return NULL;
  }
  gt_error_delete(error);
  gt_hashmap_delete(typestokeep);
  while(gt_queue_size(streams) > 0)
  {
    stream = gt_queue_get(streams);
    gt_node_stream_delete(stream);
  }
  gt_queue_delete(streams);

  return nv;
}
GtUword gt_add_ids_visitor_node_buffer_size(GtNodeVisitor *nv)
{
  GtAddIDsVisitor *add_ids_visitor = add_ids_visitor_cast(nv);
  return gt_queue_size(add_ids_visitor->node_buffer);
}