Beispiel #1
0
unsigned long gt_layout_get_height(const GtLayout *layout)
{
  GtTracklineInfo lines;
  double tmp, head_track_space = HEAD_TRACK_SPACE_DEFAULT;
  bool show_track_captions;
  unsigned long height,
                line_height,
                i;
  gt_assert(layout);

  /* get dynamic heights from tracks */
  lines.style = layout->style; lines.height = 0;
  (void) gt_hashmap_foreach(layout->tracks, add_tracklines,
                            &lines, NULL);
  height = lines.height;

  /* obtain line height and spacer from style */
  if (gt_style_get_num(layout->style, "format", "bar_height", &tmp, NULL))
    line_height = tmp;
  else
    line_height = BAR_HEIGHT_DEFAULT;
  if (gt_style_get_num(layout->style, "format", "bar_vspace", &tmp, NULL))
    line_height += tmp;
  else
    line_height += BAR_VSPACE_DEFAULT;

  if (!(gt_style_get_bool(layout->style, "format","show_track_captions",
                          &show_track_captions, NULL)))
    show_track_captions = true;

  /* add custom track space allotment */
  if (show_track_captions)
  {
    double theight = TOY_TEXT_HEIGHT,
           captionspace = CAPTION_BAR_SPACE_DEFAULT;
    (void) gt_style_get_num(layout->style, "format", "track_caption_font_size",
                            &theight, NULL);
    (void) gt_style_get_num(layout->style, "format", "track_caption_space",
                            &captionspace, NULL);
    height += gt_array_size(layout->custom_tracks)
                  * (theight + captionspace);
  }

  for (i=0;i<gt_array_size(layout->custom_tracks);i++)
  {
    GtCustomTrack *ct = *(GtCustomTrack**) gt_array_get(layout->custom_tracks,
                                                        i);
    height += gt_custom_track_get_height(ct);
    (void) gt_style_get_num(layout->style, "format", "track_vspace", &tmp,
                            NULL);
    height += tmp;

  }

  /* add header space and footer */
  (void) gt_style_get_num(layout->style, "format", "ruler_space",
                          &head_track_space, NULL);
  height += HEADER_SPACE + head_track_space + FOOTER_SPACE;
  return height;
}
static void infer_cds_visitor_check_cds_multi(AgnInferCDSVisitor *v)
{
  if(gt_array_size(v->cds) <= 1)
  {
    return;
  }

  GtFeatureNode **firstsegment = gt_array_get(v->cds, 0);
  const char *id = gt_feature_node_get_attribute(*firstsegment, "ID");
  if(id == NULL)
  {
    char newid[64];
    sprintf(newid, "CDS%lu", v->cdscounter++);
    gt_feature_node_add_attribute(*firstsegment, "ID", newid);
  }
  gt_feature_node_make_multi_representative(*firstsegment);
  GtUword i;
  for(i = 0; i < gt_array_size(v->cds); i++)
  {
    GtFeatureNode **segment = gt_array_get(v->cds, i);
    if(!gt_feature_node_is_multi(*segment))
    {
      gt_feature_node_set_multi_representative(*segment, *firstsegment);
    }
  }
}
Beispiel #3
0
void gt_ranges_copy_to_opposite_strand(GtArray *outranges,
                                       const GtArray *inranges,
                                       GtUword gen_total_length,
                                       GtUword gen_offset)
{
  GtRange range;
  GtUword i;

  /* outranges are empty */
  gt_assert(!gt_array_size(outranges));
  /* inranges are not empty */
  gt_assert(gt_array_size(inranges));

  for (i = gt_array_size(inranges); i > 0; i--) {
    /* genomic offset is defined */
    gt_assert(gen_offset != GT_UNDEF_UWORD);
    range.start  = gen_total_length - 1
                  - (((GtRange*) gt_array_get(inranges, i-1))->end -
                     gen_offset)
                  + gen_offset;
    range.end = gen_total_length - 1
                  - (((GtRange*) gt_array_get(inranges, i-1))->start -
                     gen_offset)
                  + gen_offset;
    gt_array_add(outranges, range);
  }

  /* outranges has the same number of elements as inranges */
  gt_assert(gt_array_size(inranges) == gt_array_size(outranges));
}
Beispiel #4
0
static void create_transitive_part_of_edges(GtTypeNode *node,
                                            GtBoolMatrix *part_of_out_edges,
                                            GtBoolMatrix *part_of_in_edges,
                                            GtArray *node_stack)
{
  unsigned long i, j;
  if (gt_array_size(node_stack)) {
    for (i  = gt_bool_matrix_get_first_column(part_of_in_edges, node->num);
         i != gt_bool_matrix_get_last_column(part_of_in_edges, node->num);
         i  = gt_bool_matrix_get_next_column(part_of_in_edges, node->num, i)) {
      for (j = 0; j < gt_array_size(node_stack); j++) {
        GtTypeNode *child = *(GtTypeNode**) gt_array_get(node_stack, j);
        gt_bool_matrix_set(part_of_out_edges, i, child->num, true);
        gt_bool_matrix_set(part_of_in_edges, child->num, i, true);
      }
    }
  }
  gt_array_add(node_stack, node);
  for (i = 0; i < gt_array_size(node->is_a_out_edges); i++) {
    GtTypeNode *parent = *(GtTypeNode**) gt_array_get(node->is_a_out_edges, i);
    create_transitive_part_of_edges(parent, part_of_out_edges, part_of_in_edges,
                                    node_stack);
  }
  gt_array_pop(node_stack);
}
Beispiel #5
0
static double gaeval_visitor_introns_confirmed(GtArray *introns, GtArray *gaps)
{
  agn_assert(introns && gaps);
  GtUword intron_count = gt_array_size(introns);
  GtUword gap_count = gt_array_size(gaps);
  agn_assert(intron_count > 0);

  if(gap_count == 0)
    return 0.0;

  GtUword i, j, num_confirmed = 0;
  for(i = 0; i < intron_count; i++)
  {
    GtGenomeNode *intron = *(GtGenomeNode **)gt_array_get(introns, i);
    GtRange intron_range = gt_genome_node_get_range(intron);
    for(j = 0; j < gap_count; j++)
    {
      GtGenomeNode *gap = *(GtGenomeNode **)gt_array_get(gaps, j);
      GtRange gap_range = gt_genome_node_get_range(gap);
      if(gt_range_compare(&intron_range, &gap_range) == 0)
      {
        num_confirmed++;
        break;
      }
    }
  }

  return (double)num_confirmed / (double)intron_count;
}
Beispiel #6
0
static void convert_chain_to_inverted_chain(GthInvertedChain *inverted_chain,
                                            GthChain *chain)
{
  unsigned long i, lastexonnum = gt_array_size(chain->forwardranges) - 1;
  GtRange range;

  /* inverted chain is empty */
  gt_assert(!gt_array_size(inverted_chain->forwardranges));
  /* chain is not empty */
  gt_assert(gt_array_size(chain->forwardranges));

  /* copy file and sequence numbers */
  inverted_chain->gen_file_num = chain->gen_file_num;
  inverted_chain->gen_seq_num  = chain->gen_seq_num;
  inverted_chain->ref_file_num = chain->ref_file_num;
  inverted_chain->ref_seq_num  = chain->ref_seq_num;

  /* save startpos */
  inverted_chain->startpos = ((GtRange*)
                              gt_array_get_first(chain->forwardranges))->start;

  /* save endpos */
  inverted_chain->endpos = ((GtRange*)
                             gt_array_get_last(chain->forwardranges))->end;

  /* convert (potential) exons to (potential) introns */
  for (i = 0; i < lastexonnum; i++) {
    range.start  = ((GtRange*) gt_array_get(chain->forwardranges, i))
                  ->end + 1;
    range.end = ((GtRange*) gt_array_get(chain->forwardranges, i+1))
                  ->start - 1;
    gt_array_add(inverted_chain->forwardranges, range);
  }
}
Beispiel #7
0
static bool has_donor_site(GtArray *gene, unsigned long exon)
{
  gt_assert(exon < gt_array_size(gene));
  if (exon == gt_array_size(gene) - 1)
    return false;
  return true;
}
/* necessary to call consensus_sa() */
static void process_splice_form_func(GtArray *spliced_alignments_in_form,
                                     GT_UNUSED const void *set_of_sas,
                                     GT_UNUSED GtUword number_of_sas,
                                     GT_UNUSED size_t size_of_sa,
                                     void *userdata)
{
  GthPGL *pgl = (GthPGL*) userdata;
  GthAGS *ags;
  GtBittab *assemblytab;
  GtUword i;

  ags = gth_ags_new(pgl);

  assemblytab = gt_bittab_new(gt_array_size(pgl->saclusters));
  for (i = 0; i < gt_array_size(spliced_alignments_in_form); i++) {
    gt_bittab_set_bit(assemblytab, *(GtUword*)
                                gt_array_get(spliced_alignments_in_form, i));
  }

  gth_build_AGS_from_assembly(ags, assemblytab, pgl->saclusters);

  gt_bittab_delete(assemblytab);

  gt_array_add(pgl->assemblies, ags);
}
Beispiel #9
0
static GtArray *gaeval_visitor_union(GtArray *cov1, GtArray *cov2)
{
  agn_assert(cov1 && cov2);
  gt_array_add_array(cov1, cov2);
  if(gt_array_size(cov1) > 1)
    gt_array_sort(cov1, (GtCompare)gt_range_compare);

  GtArray *runion = gt_array_new(sizeof(GtRange));
  if(gt_array_size(cov1) == 0)
    return runion;
  GtRange *rng = gt_array_get(cov1, 0);
  gt_array_add(runion, *rng);
  GtRange *prev = gt_array_get(runion, 0);
  if(gt_array_size(cov1) == 1)
    return runion;

  GtUword i;
  for(i = 1; i < gt_array_size(cov1); i++)
  {
    rng = gt_array_get(cov1, i);
    if(gt_range_overlap(rng, prev))
      *prev = gt_range_join(rng, prev);
    else
    {
      gt_array_add(runion, *rng);
      prev = gt_array_get(runion, gt_array_size(runion) - 1);
    }
  }

  return runion;
}
Beispiel #10
0
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtSortStream *sort_stream;
  GtGenomeNode *node, *eofn;
  int had_err = 0;
  gt_error_check(err);
  sort_stream = gt_sort_stream_cast(ns);

  if (!sort_stream->sorted) {
    while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node,
                                           err)) && node) {
      if ((eofn = gt_eof_node_try_cast(node)))
        gt_genome_node_delete(eofn); /* get rid of EOF nodes */
      else
        gt_array_add(sort_stream->nodes, node);
    }
    if (!had_err) {
      gt_genome_nodes_sort_stable(sort_stream->nodes);
      sort_stream->sorted = true;
    }
  }

  if (!had_err) {
    gt_assert(sort_stream->sorted);
    if (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
      *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                           sort_stream->idx);
      sort_stream->idx++;
      /* join region nodes with the same sequence ID */
      if (gt_region_node_try_cast(*gn)) {
        GtRange range_a, range_b;
        while (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
          node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                                sort_stream->idx);
          if (!gt_region_node_try_cast(node) ||
              gt_str_cmp(gt_genome_node_get_seqid(*gn),
                         gt_genome_node_get_seqid(node))) {
            /* the next node is not a region node with the same ID */
            break;
          }
          range_a = gt_genome_node_get_range(*gn);
          range_b = gt_genome_node_get_range(node);
          range_a = gt_range_join(&range_a, &range_b);
          gt_genome_node_set_range(*gn, &range_a);
          gt_genome_node_delete(node);
          sort_stream->idx++;
        }
      }
      return 0;
    }
  }

  if (!had_err) {
    gt_array_reset(sort_stream->nodes);
    *gn = NULL;
  }

  return had_err;
}
int gt_feature_index_add_gff3file(GtFeatureIndex *feature_index,
                                  const char *gff3file, GtError *err)
{
  GtNodeStream *gff3_in_stream;
  GtGenomeNode *gn;
  GtArray *tmp;
  int had_err = 0;
  GtUword i;
  gt_error_check(err);
  gt_assert(feature_index && gff3file);
  tmp = gt_array_new(sizeof (GtGenomeNode*));
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, &gff3file);
  while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn)
    gt_array_add(tmp, gn);
  if (!had_err) {
    GtNodeVisitor *feature_visitor = gt_feature_visitor_new(feature_index);
    for (i=0;i<gt_array_size(tmp);i++) {
      gn = *(GtGenomeNode**) gt_array_get(tmp, i);
      /* no need to lock, add_*_node() is synchronized */
      had_err = gt_genome_node_accept(gn, feature_visitor, NULL);
      gt_assert(!had_err); /* cannot happen */
    }
    gt_node_visitor_delete(feature_visitor);
  }
  gt_node_stream_delete(gff3_in_stream);
  for (i=0;i<gt_array_size(tmp);i++)
    gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(tmp, i));
  gt_array_delete(tmp);
  return had_err;
}
static void snp_annotator_stream_free(GtNodeStream *ns)
{
  GtUword i;
  GtSNPAnnotatorStream *sas;
  if (!ns) return;
  sas = gt_snp_annotator_stream_cast(ns);
  gt_region_mapping_delete(sas->rmap);
  while (gt_queue_size(sas->snps) > 0) {
    gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->snps));
  }
  while (gt_queue_size(sas->outqueue) > 0) {
    gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->outqueue));
  }
  for (i = 0; i < gt_array_size(sas->instreams); i++) {
    gt_node_stream_delete(*(GtNodeStream**) gt_array_get(sas->instreams, i));
  }
  for (i = 0; i < gt_array_size(sas->cur_gene_set); i++) {
    gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(sas->cur_gene_set, i));
  }
  gt_array_delete(sas->cur_gene_set);
  gt_node_stream_delete(sas->merge_stream);
  gt_array_delete(sas->instreams);
  gt_queue_delete(sas->snps);
  gt_queue_delete(sas->outqueue);
}
static int gt_ltrdigest_pdom_visitor_attach_hit(GtLTRdigestPdomVisitor *lv,
                                                GtHMMERModelHit *modelhit,
                                                GtHMMERSingleHit *singlehit)
{
  GT_UNUSED GtUword i;
  GtGenomeNode *gf;
  int had_err = 0;
  GtRange rrng;
  gt_assert(lv && singlehit);

  rrng = gt_ltrdigest_pdom_visitor_coords(lv, singlehit);

  if (gt_array_size(singlehit->chains) > 0 || lv->output_all_chains) {
    char buf[32];
    gf = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*)
                                                      lv->ltr_retrotrans),
                             gt_ft_protein_match,
                             rrng.start,
                             rrng.end,
                             singlehit->strand);
    gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_alignment",
                                 gt_str_ref(singlehit->alignment),
                                 (GtFree) gt_str_delete);
    gt_genome_node_add_user_data((GtGenomeNode*) gf, "pdom_aaseq",
                                 gt_str_ref(singlehit->aastring),
                                 (GtFree) gt_str_delete);
    gt_feature_node_set_source((GtFeatureNode*) gf, lv->tag);
    gt_feature_node_set_score((GtFeatureNode*) gf, (float) singlehit->evalue);
    (void) snprintf(buf, (size_t) 32, "%d", (int) singlehit->frame);
    gt_feature_node_add_attribute((GtFeatureNode*) gf,
                                    "reading_frame", buf);
    if (modelhit->modelname != NULL) {
      gt_feature_node_add_attribute((GtFeatureNode*) gf, "name",
                                    modelhit->modelname);
    }
    if (gt_array_size(singlehit->chains) > 1UL && lv->output_all_chains) {
      GtStr *buffer;
      GtUword j;
      gt_assert(singlehit->chains != NULL);
      buffer = gt_str_new();
      for (j = 0UL; j < gt_array_size(singlehit->chains); j++) {
        gt_str_append_cstr(buffer, modelhit->modelname);
        gt_str_append_char(buffer, ':');
        gt_str_append_ulong(buffer,
                          *(GtUword*) gt_array_get(singlehit->chains, j));
        if (j != gt_array_size(singlehit->chains) - 1) {
          gt_str_append_char(buffer, ',');
        }
      }
      gt_feature_node_set_attribute((GtFeatureNode*) gf, "chains",
                                    gt_str_get(buffer));
      gt_str_delete(buffer);
    }
    gt_feature_node_add_child(lv->ltr_retrotrans, (GtFeatureNode*) gf);
  }
  gt_array_delete(singlehit->chains);
  singlehit->chains = NULL;
  return had_err;
}
static void gt_hmmer_parse_status_mark_frame_finished(GtHMMERParseStatus *s)
{
  GtHMMERModelHit *mh;
  gt_assert(s && s->models);
  mh = gt_hashmap_get(s->models, gt_str_get(s->cur_model));
  if (mh != NULL) {
    mh->last_array_size_fwd = gt_array_size(mh->fwd_hits);
    mh->last_array_size_rev = gt_array_size(mh->rev_hits);
  }
}
Beispiel #15
0
static GtArray*
gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment)
{
  agn_assert(genemodel && alignment);

  GtFeatureNode *genefn = gt_feature_node_cast(genemodel);
  GtFeatureNode *algnfn = gt_feature_node_cast(alignment);
  agn_assert(gt_feature_node_has_type(genefn, "mRNA"));
  GtStrand genestrand = gt_feature_node_get_strand(genefn);
  GtStrand algnstrand = gt_feature_node_get_strand(algnfn);
  if(genestrand != algnstrand)
    return NULL;

  GtArray *covered_parts = gt_array_new( sizeof(GtRange) );
  GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon);
  GtWord i;
  for(i = 0; i < gt_array_size(exons); i++)
  {
    GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i);
    GtRange exonrange = gt_genome_node_get_range(exon);

    GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn);
    GtFeatureNode *tempaln;
    GtRange nullrange = {0, 0};
    for(tempaln  = gt_feature_node_iterator_next(aniter);
        tempaln != NULL;
        tempaln  = gt_feature_node_iterator_next(aniter))
    {
      if(gt_feature_node_has_type(tempaln, "match_gap"))
        continue;

      GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln);
      GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange);
      if(gt_range_compare(&intr, &nullrange) != 0)
        gt_array_add(covered_parts, intr);
    }
    gt_feature_node_iterator_delete(aniter);
  }
  gt_array_delete(exons);

  for(i = 0; i < gt_array_size(covered_parts); i++)
  {
    GtRange *r1 = gt_array_get(covered_parts, i);
    GtUword j;
    for(j = i+1; j < gt_array_size(covered_parts); j++)
    {
      GtRange *r2 = gt_array_get(covered_parts, j);
      agn_assert(gt_range_overlap(r1, r2) == false);
    }
  }

  return covered_parts;
}
static void gt_hmmer_model_hit_delete(GtHMMERModelHit *mh)
{
  unsigned long i;
  if (!mh) return;
  for (i = 0; i < gt_array_size(mh->fwd_hits); i++)
    gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->fwd_hits, i));
  gt_array_delete(mh->fwd_hits);
  for (i = 0; i < gt_array_size(mh->rev_hits); i++)
    gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->rev_hits, i));
   gt_array_delete(mh->rev_hits);
  gt_free(mh);
}
Beispiel #17
0
void agn_transcript_structure_gbk(GtFeatureNode *transcript, FILE *outstream)
{
  gt_assert(transcript && outstream);

  GtArray *exons = gt_array_new( sizeof(GtFeatureNode *) );
  GtFeatureNodeIterator *iter = gt_feature_node_iterator_new_direct(transcript);
  GtFeatureNode *child;
  for
  (
    child = gt_feature_node_iterator_next(iter);
    child != NULL;
    child = gt_feature_node_iterator_next(iter)
  )
  {
    if(agn_gt_feature_node_is_exon_feature(child))
      gt_array_add(exons, child);
  }
  gt_feature_node_iterator_delete(iter);

  gt_assert(gt_array_size(exons) > 0);
  gt_array_sort(exons, (GtCompare)agn_gt_genome_node_compare);

  if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE)
    fputs("complement(", outstream);

  if(gt_array_size(exons) == 1)
  {
    GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, 0);
    GtRange exonrange = gt_genome_node_get_range(exon);
    fprintf(outstream, "<%lu..>%lu", exonrange.start, exonrange.end);
  }
  else
  {
    fputs("join(", outstream);
    GtUword i;
    for(i = 0; i < gt_array_size(exons); i++)
    {
      GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i);
      GtRange exonrange = gt_genome_node_get_range(exon);

      if(i == 0)
        fprintf(outstream, "<%lu..%lu", exonrange.start, exonrange.end);
      else if(i+1 == gt_array_size(exons))
        fprintf(outstream, ",%lu..>%lu", exonrange.start, exonrange.end);
      else
        fprintf(outstream, ",%lu..%lu", exonrange.start, exonrange.end);
    }
    fputs(")", outstream);
  }

  if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE)
    fputs(")", outstream);
}
Beispiel #18
0
static void sort_matches_and_calc_buckets(GtArray *matches, GtArray *buckets,
                                          GtUword *maxbucketlength)
{
  GtUword i, currentstart = 0, currentend = 0;
  GthMatch *matchptr;
  Bucket bucket, *bucketptr;

  gt_assert(gt_array_size(matches));

  /* sort matches */
  qsort(gt_array_get_space(matches), gt_array_size(matches), sizeof (GthMatch),
        compare_matches);

  /* init first bucket */
  matchptr = gt_array_get_first(matches);
  bucket.seqnum1  = matchptr->Storeseqnumreference;
  bucket.seqnum2  = matchptr->Storeseqnumgenomic;
  bucket.startpos = 0;

  /* calc buckets */
  for (i = 1; i < gt_array_size(matches); i++) {
    matchptr = gt_array_get(matches, i);
    if (matchptr->Storeseqnumreference != bucket.seqnum1 ||
        matchptr->Storeseqnumgenomic != bucket.seqnum2) {
      /* save the current bucket */
      currentend    = i - 1;
      bucket.length = currentend - currentstart + 1;
      gt_array_add(buckets, bucket);

      /* create new bucket */
      currentstart    = i;
      bucket.seqnum1  = matchptr->Storeseqnumreference;
      bucket.seqnum2  = matchptr->Storeseqnumgenomic;
      bucket.startpos = i;
    }
  }

  /* save last bucket */
  currentend = i - 1;
  bucket.length = currentend - currentstart + 1;
  gt_array_add(buckets, bucket);

  /* compute maximum bucket length */
  *maxbucketlength = 0;
  for (i = 0; i < gt_array_size(buckets); i++) {
    bucketptr = gt_array_get(buckets, i);
    if (bucketptr->length > *maxbucketlength)
      *maxbucketlength = bucketptr->length;
  }

  gt_assert(sum_of_bucket_lengths_equals_num_of_matches(buckets,
                                                     gt_array_size(matches)));
}
Beispiel #19
0
bool gt_ranges_do_not_overlap(const GtArray *ranges)
{
  GtUword i;

  gt_assert(ranges && gt_array_size(ranges));

  for (i = 1; i < gt_array_size(ranges); i++) {
    if (gt_range_overlap(gt_array_get(ranges, i-1), gt_array_get(ranges, i)))
      return false;
  }
  return true;
}
Beispiel #20
0
bool agn_unit_test_success(AgnUnitTest *test)
{
    agn_assert(gt_array_size(test->results) > 0);
    GtUword i;
    for(i = 0; i < gt_array_size(test->results); i++)
    {
        UnitTestResult *result = gt_array_get(test->results, i);
        if(!result->success)
            return false;
    }
    return true;
}
static void infer_cds_visitor_check_stop(AgnInferCDSVisitor *v)
{
  if(gt_array_size(v->cds) == 0)
    return;

  const char *mrnaid = gt_feature_node_get_attribute(v->mrna, "ID");
  unsigned int ln = gt_genome_node_get_line_number((GtGenomeNode *)v->mrna);
  GtStrand strand = gt_feature_node_get_strand(v->mrna);

  GtRange stoprange;
  GtUword threeprimeindex = gt_array_size(v->cds) - 1;
  GtGenomeNode **threeprimesegment = gt_array_get(v->cds, threeprimeindex);
  stoprange = gt_genome_node_get_range(*threeprimesegment);
  stoprange.start = stoprange.end - 2;
  if(strand == GT_STRAND_REVERSE)
  {
    threeprimesegment = gt_array_get(v->cds, 0);
    stoprange = gt_genome_node_get_range(*threeprimesegment);
    stoprange.end = stoprange.start + 2;
  }

  if(gt_array_size(v->stops) > 1)
  {
    gt_logger_log(v->logger, "mRNA '%s' (line %u) has %lu stop codons", mrnaid,
                  ln, gt_array_size(v->starts));
  }
  else if(gt_array_size(v->stops) == 1)
  {
    GtGenomeNode **codon = gt_array_get(v->stops, 0);
    GtRange testrange = gt_genome_node_get_range(*codon);
    if(gt_range_compare(&stoprange, &testrange) != 0)
    {
      gt_logger_log(v->logger, "stop codon inferred from CDS [%lu, %lu] does "
                    "not match explicitly provided stop codon [%lu, %lu] for "
                    "mRNA '%s'", stoprange.start, stoprange.end,
                    testrange.start, testrange.end, mrnaid);
    }
  }
  else // agn_assert(gt_array_size(v->stops) == 0)
  {
    GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)v->mrna);
    GtGenomeNode *codonfeature = gt_feature_node_new(seqid, "stop_codon",
                                                     stoprange.start,
                                                     stoprange.end,
                                                     strand);
    if(v->source)
      gt_feature_node_set_source((GtFeatureNode *)codonfeature, v->source);
    GtFeatureNode *cf = (GtFeatureNode *)codonfeature;
    gt_feature_node_add_child(v->mrna, cf);
    gt_array_add(v->stops, cf);
  }
}
Beispiel #22
0
static int get_next_free_line(GtTrack *track, GtLine **result, GtBlock *block,
                              GtError *err)
{
    unsigned long i;
    GtLine* line;
    int had_err = 0;
    bool is_occupied;
    gt_assert(track);

    /* find unoccupied line -- may need optimisation */
    for (i = 0; i < gt_array_size(track->lines); i++) {
        line = *(GtLine**) gt_array_get(track->lines, i);
        had_err = gt_line_breaker_line_is_occupied(track->lb, &is_occupied, line,
                  block, err);
        if (had_err)
            break;
        if (!is_occupied) {
            *result = line;
            return 0;
        }
    }
    /* all lines are occupied, we need o create a new one */
    if (!had_err) {
        /* if line limit is hit, do not create any more lines! */
        if (track->max_num_lines != GT_UNDEF_ULONG
                && gt_array_size(track->lines) == track->max_num_lines)
        {
            track->discarded_blocks++;
            *result = NULL;
        }
        /* make sure there is only one line if 'split_lines' is set to false */
        if (!track->split)
        {
            if (gt_array_size(track->lines) < 1)
            {
                line = gt_line_new();
                gt_array_add(track->lines, line);
            }
            else
                line = *(GtLine**) gt_array_get(track->lines, 0);
            gt_assert(gt_array_size(track->lines) == 1);
        }
        else
        {
            line = gt_line_new();
            gt_array_add(track->lines, line);
        }
        gt_assert(line);
    }
    *result = line;
    return had_err;
}
static void infer_cds_visitor_infer_cds(AgnInferCDSVisitor *v)
{
  GtFeatureNode **start_codon = NULL, **stop_codon = NULL;

  bool exonsexplicit    = gt_array_size(v->exons) > 0;
  bool startcodon_check = gt_array_size(v->starts) == 1 &&
                          (start_codon = gt_array_get(v->starts, 0)) != NULL;
  bool stopcodon_check  = gt_array_size(v->stops)  == 1 &&
                          (stop_codon  = gt_array_get(v->stops,  0)) != NULL;

  if(gt_array_size(v->cds) > 0)
  {
    return;
  }
  else if(!exonsexplicit || !startcodon_check || !stopcodon_check)
  {
    return;
  }

  GtRange left_codon_range, right_codon_range;
  left_codon_range  = gt_genome_node_get_range(*(GtGenomeNode **)start_codon);
  right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon);
  if(gt_feature_node_get_strand(v->mrna) == GT_STRAND_REVERSE)
  {
    left_codon_range  = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon);
    right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon);
  }
  GtUword i;
  for(i = 0; i < gt_array_size(v->exons); i++)
  {
    GtFeatureNode *exon = *(GtFeatureNode **)gt_array_get(v->exons, i);
    GtGenomeNode *exon_gn = (GtGenomeNode *)exon;
    GtRange exon_range = gt_genome_node_get_range(exon_gn);
    GtStrand exon_strand = gt_feature_node_get_strand(exon);

    GtRange cdsrange;
    bool exon_includes_cds = infer_cds_visitor_infer_range(&exon_range,
                                                           &left_codon_range,
                                                           &right_codon_range,
                                                           &cdsrange);
    if(exon_includes_cds)
    {
      GtGenomeNode *cdsfeat;
      cdsfeat = gt_feature_node_new(gt_genome_node_get_seqid(exon_gn), "CDS",
                                    cdsrange.start, cdsrange.end, exon_strand);
      if(v->source)
        gt_feature_node_set_source((GtFeatureNode *)cdsfeat, v->source);
      gt_feature_node_add_child(v->mrna, (GtFeatureNode *)cdsfeat);
      gt_array_add(v->cds, cdsfeat);
    }
  }
}
Beispiel #24
0
static void gv_test_introns_confirmed(AgnUnitTest *test)
{
  GtGenomeNode *intron, *gap;
  GtStr *seqid = gt_str_new_cstr("chr");
  GtArray *introns = gt_array_new( sizeof(GtGenomeNode *) );
  intron = gt_feature_node_new(seqid, "intron", 1000, 1170, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 1225, 1305, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 1950, 2110, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 2545, 2655, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 2800, 2950, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);

  GtArray *gaps = gt_array_new( sizeof(GtGenomeNode *) );

  double intcon = gaeval_visitor_introns_confirmed(introns, gaps);
  bool test1 = fabs(intcon - 0.0) < 0.0001;
  agn_unit_test_result(test, "introns confirmed (no gaps)", test1);

  gap = gt_feature_node_new(seqid, "match_gap", 1000, 1170, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 1225, 1302, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 1950, 2110, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 2575, 2655, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 2800, 2950, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);

  intcon = gaeval_visitor_introns_confirmed(introns, gaps);
  bool test2 = fabs(intcon - 0.6) < 0.0001;
  agn_unit_test_result(test, "introns confirmed (gaps)", test2);

  while(gt_array_size(introns) > 0)
  {
    intron = *(GtGenomeNode **)gt_array_pop(introns);
    gt_genome_node_delete(intron);
  }
  gt_array_delete(introns);

  while(gt_array_size(gaps) > 0)
  {
    gap = *(GtGenomeNode **)gt_array_pop(gaps);
    gt_genome_node_delete(gap);
  }
  gt_array_delete(gaps);
  gt_str_delete(seqid);
}
Beispiel #25
0
static void enrich_chain(GthChain *chain, GtFragment *fragments,
                         unsigned long num_of_fragments, bool comments,
                         GtFile *outfp)
{
  GtRange genomicrange, fragmentrange;
  GtArray *enrichment;
  unsigned long i;
  gt_assert(chain && fragments && num_of_fragments);
  if (comments) {
    gt_file_xprintf(outfp, "%c enrich global chain with the following "
                              "forward ranges:\n",COMMENTCHAR);
    gt_file_xprintf(outfp, "%c ", COMMENTCHAR);
    gt_ranges_show(chain->forwardranges, outfp);
  }
  /* get genomic range of DP range */
  genomicrange = chain_get_genomicrange(chain);
  enrichment = gt_array_new(sizeof (GtRange));
  /* add each fragment which overlaps which DP range to the enrichment */
  for (i = 0; i < num_of_fragments; i++) {
    fragmentrange.start  = fragments[i].startpos2;
    fragmentrange.end = fragments[i].endpos2;
    if (gt_range_overlap(&genomicrange, &fragmentrange))
      gt_array_add(enrichment, fragmentrange);
  }
  gt_assert(gt_array_size(enrichment));
  /* sort the enrichment */
  qsort(gt_array_get_space(enrichment), gt_array_size(enrichment),
        sizeof (GtRange), (GtCompare) gt_range_compare);
  /* reset the current DP range array */
  gt_array_reset(chain->forwardranges);
  /* rebuild the DP range array which now includes the enrichment */
  genomicrange = *(GtRange*) gt_array_get_first(enrichment);
  gt_array_add(chain->forwardranges, genomicrange);
  for (i = 1; i < gt_array_size(enrichment); i++) {
    genomicrange = *(GtRange*) gt_array_get(enrichment, i);
    if (genomicrange.start <=
        ((GtRange*) gt_array_get_last(chain->forwardranges))->end) {
      /* overlap found -> modify last range, if necessary */
      if (((GtRange*) gt_array_get_last(chain->forwardranges))->end <
          genomicrange.end) {
        ((GtRange*) gt_array_get_last(chain->forwardranges))->end =
          genomicrange.end;
      }
    }
    else {
      /* save range */
      gt_array_add(chain->forwardranges, genomicrange);
    }
  }
  gt_array_delete(enrichment);
}
static int gff3_show_feature_node(GtFeatureNode *fn, void *data,
                                  GT_UNUSED GtError *err)
{
  bool part_shown = false;
  GtGFF3Visitor *gff3_visitor = (GtGFF3Visitor*) data;
  GtArray *parent_features = NULL;
  ShowAttributeInfo info;
  GtUword i;
  GtStr *id;

  gt_error_check(err);
  gt_assert(fn && gff3_visitor);

  /* output leading part */
  gt_gff3_output_leading(fn, gff3_visitor->outfp);

  /* show unique id part of attributes */
  if ((id = gt_hashmap_get(gff3_visitor->feature_node_to_unique_id_str, fn))) {
    gt_file_xprintf(gff3_visitor->outfp, "%s=%s", GT_GFF_ID, gt_str_get(id));
    part_shown = true;
  }

  /* show parent part of attributes */
  parent_features = gt_hashmap_get(gff3_visitor->feature_node_to_id_array, fn);
  if (gt_array_size(parent_features)) {
    if (part_shown)
      gt_file_xfputc(';', gff3_visitor->outfp);
    gt_file_xprintf(gff3_visitor->outfp, "%s=", GT_GFF_PARENT);
    for (i = 0; i < gt_array_size(parent_features); i++) {
      if (i)
        gt_file_xfputc(',', gff3_visitor->outfp);
      gt_file_xprintf(gff3_visitor->outfp, "%s",
                      *(char**) gt_array_get(parent_features, i));
    }
    part_shown = true;
  }

  /* show missing part of attributes */
  info.attribute_shown = &part_shown;
  info.outfp = gff3_visitor->outfp;
  gt_feature_node_foreach_attribute(fn, show_attribute, &info);

  /* show dot if no attributes have been shown */
  if (!part_shown)
    gt_file_xfputc('.', gff3_visitor->outfp);

  /* show terminal newline */
  gt_file_xfputc('\n', gff3_visitor->outfp);

  return 0;
}
Beispiel #27
0
void agn_bron_kerbosch( GtArray *R, GtArray *P, GtArray *X, GtArray *cliques,
                        bool skipsimplecliques )
{
  gt_assert(R != NULL && P != NULL && X != NULL && cliques != NULL);

  if(gt_array_size(P) == 0 && gt_array_size(X) == 0)
  {
    if(skipsimplecliques == false || gt_array_size(R) != 1)
    {
      GtUword i;
      AgnTranscriptClique *clique = agn_transcript_clique_new();
      for(i = 0; i < gt_array_size(R); i++)
      {
        GtFeatureNode *transcript = *(GtFeatureNode **)gt_array_get(R, i);
        agn_transcript_clique_add(clique, transcript);
      }
      gt_array_add(cliques, clique);
    }
  }

  while(gt_array_size(P) > 0)
  {
    GtGenomeNode *v = *(GtGenomeNode **)gt_array_get(P, 0);

    // newR = R \union {v}
    GtArray *newR = agn_gt_array_copy(R, sizeof(GtGenomeNode *));
    gt_array_add(newR, v);
    // newP = P \intersect N(v)
    GtArray *newP = agn_feature_neighbors(v, P);
    // newX = X \intersect N(v)
    GtArray *newX = agn_feature_neighbors(v, X);

    // Recursive call
    // agn_bron_kerbosch(R \union {v}, P \intersect N(v), X \intersect N(X))
    agn_bron_kerbosch(newR, newP, newX, cliques, skipsimplecliques);

    // Delete temporary arrays just created
    gt_array_delete(newR);
    gt_array_delete(newP);
    gt_array_delete(newX);

    // P := P \ {v}
    gt_array_rem(P, 0);

    // X := X \union {v}
    gt_array_add(X, v);
  }
}
static void split_cds_feature(GtFeatureNode *cds_feature, GtFeatureNode *fn)
{
  GtArray *parents;
  unsigned long i;
  gt_assert(cds_feature && fn);

  /* find parents */
  parents = find_cds_parents(cds_feature, fn);

  /* remove CDS feature */
  gt_feature_node_remove_leaf(fn, cds_feature);

  /* add CDS feature to all parents */
  for (i = 0; i < gt_array_size(parents); i++) {
    GtFeatureNode *parent = *(GtFeatureNode**) gt_array_get(parents, i);
    const char *id = gt_feature_node_get_attribute(parent, GT_GFF_ID);
    if (!i) {
      gt_feature_node_set_attribute(cds_feature, GT_GFF_PARENT, id);
      gt_feature_node_add_child(parent, cds_feature);
    }
    else {
      GtFeatureNode *new_cds = gt_feature_node_clone(cds_feature);
      gt_feature_node_set_attribute(new_cds, GT_GFF_PARENT, id);
      gt_feature_node_add_child(parent, new_cds);
      gt_genome_node_delete((GtGenomeNode*) cds_feature);
    }
  }

  gt_array_delete(parents);
}
Beispiel #29
0
static void potentialintronspostpro(GtArray *intronstoprocess,
                                    unsigned long icdelta,
                                    unsigned long icminremintronlength)
{
  GtArray *originalintrons;
  GtRange potintron;
  unsigned long i, potintronlength,
       minintronlength = 2 * icdelta + icminremintronlength;

  originalintrons = gt_array_new(sizeof (GtRange));

  /* save all (potential) introns */
  gt_array_add_array(originalintrons, intronstoprocess);

  /* reset introns to process */
  gt_array_set_size(intronstoprocess, 0);

  /* store introns */
  for (i = 0; i < gt_array_size(originalintrons); i++) {
    potintron       = *(GtRange*) gt_array_get(originalintrons, i);
    potintronlength = potintron.end - potintron.start + 1;

    if (potintronlength >= minintronlength) {
      /* keep this intron (plus/minus intron deltas)
         that is, this intron is cut out later */
      potintron.start  += icdelta;
      potintron.end -= icdelta;
      gt_array_add(intronstoprocess, potintron);
    }
    /* else: skip this intron
       that is, this intron is not cut out later */
  }

  gt_array_delete(originalintrons);
}
Beispiel #30
0
static void push_features_as_table(lua_State *L, GtArray *features)
{
  unsigned long i;
  if (features && gt_array_size(features)) {
    /* push table containing feature references onto the stack */
    lua_newtable(L);
    for (i = 0; i < gt_array_size(features); i++) {
      lua_pushinteger(L, i+1); /* in Lua we index from 1 on */
      gt_lua_genome_node_push(L, gt_genome_node_ref(*(GtGenomeNode**)
                                                  gt_array_get(features, i)));
      lua_rawset(L, -3);
    }
  }
  else
    lua_pushnil(L);
}