Esempio n. 1
0
static void create_transitive_part_of_edges(GtTypeNode *node,
                                            GtBoolMatrix *part_of_out_edges,
                                            GtBoolMatrix *part_of_in_edges,
                                            GtArray *node_stack)
{
  unsigned long i, j;
  if (gt_array_size(node_stack)) {
    for (i  = gt_bool_matrix_get_first_column(part_of_in_edges, node->num);
         i != gt_bool_matrix_get_last_column(part_of_in_edges, node->num);
         i  = gt_bool_matrix_get_next_column(part_of_in_edges, node->num, i)) {
      for (j = 0; j < gt_array_size(node_stack); j++) {
        GtTypeNode *child = *(GtTypeNode**) gt_array_get(node_stack, j);
        gt_bool_matrix_set(part_of_out_edges, i, child->num, true);
        gt_bool_matrix_set(part_of_in_edges, child->num, i, true);
      }
    }
  }
  gt_array_add(node_stack, node);
  for (i = 0; i < gt_array_size(node->is_a_out_edges); i++) {
    GtTypeNode *parent = *(GtTypeNode**) gt_array_get(node->is_a_out_edges, i);
    create_transitive_part_of_edges(parent, part_of_out_edges, part_of_in_edges,
                                    node_stack);
  }
  gt_array_pop(node_stack);
}
Esempio n. 2
0
static GtArray *gaeval_visitor_union(GtArray *cov1, GtArray *cov2)
{
  agn_assert(cov1 && cov2);
  gt_array_add_array(cov1, cov2);
  if(gt_array_size(cov1) > 1)
    gt_array_sort(cov1, (GtCompare)gt_range_compare);

  GtArray *runion = gt_array_new(sizeof(GtRange));
  if(gt_array_size(cov1) == 0)
    return runion;
  GtRange *rng = gt_array_get(cov1, 0);
  gt_array_add(runion, *rng);
  GtRange *prev = gt_array_get(runion, 0);
  if(gt_array_size(cov1) == 1)
    return runion;

  GtUword i;
  for(i = 1; i < gt_array_size(cov1); i++)
  {
    rng = gt_array_get(cov1, i);
    if(gt_range_overlap(rng, prev))
      *prev = gt_range_join(rng, prev);
    else
    {
      gt_array_add(runion, *rng);
      prev = gt_array_get(runion, gt_array_size(runion) - 1);
    }
  }

  return runion;
}
int gt_feature_index_add_gff3file(GtFeatureIndex *feature_index,
                                  const char *gff3file, GtError *err)
{
  GtNodeStream *gff3_in_stream;
  GtGenomeNode *gn;
  GtArray *tmp;
  int had_err = 0;
  GtUword i;
  gt_error_check(err);
  gt_assert(feature_index && gff3file);
  tmp = gt_array_new(sizeof (GtGenomeNode*));
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, &gff3file);
  while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn)
    gt_array_add(tmp, gn);
  if (!had_err) {
    GtNodeVisitor *feature_visitor = gt_feature_visitor_new(feature_index);
    for (i=0;i<gt_array_size(tmp);i++) {
      gn = *(GtGenomeNode**) gt_array_get(tmp, i);
      /* no need to lock, add_*_node() is synchronized */
      had_err = gt_genome_node_accept(gn, feature_visitor, NULL);
      gt_assert(!had_err); /* cannot happen */
    }
    gt_node_visitor_delete(feature_visitor);
  }
  gt_node_stream_delete(gff3_in_stream);
  for (i=0;i<gt_array_size(tmp);i++)
    gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(tmp, i));
  gt_array_delete(tmp);
  return had_err;
}
Esempio n. 4
0
static void infer_cds_visitor_check_cds_multi(AgnInferCDSVisitor *v)
{
  if(gt_array_size(v->cds) <= 1)
  {
    return;
  }

  GtFeatureNode **firstsegment = gt_array_get(v->cds, 0);
  const char *id = gt_feature_node_get_attribute(*firstsegment, "ID");
  if(id == NULL)
  {
    char newid[64];
    sprintf(newid, "CDS%lu", v->cdscounter++);
    gt_feature_node_add_attribute(*firstsegment, "ID", newid);
  }
  gt_feature_node_make_multi_representative(*firstsegment);
  GtUword i;
  for(i = 0; i < gt_array_size(v->cds); i++)
  {
    GtFeatureNode **segment = gt_array_get(v->cds, i);
    if(!gt_feature_node_is_multi(*segment))
    {
      gt_feature_node_set_multi_representative(*segment, *firstsegment);
    }
  }
}
Esempio n. 5
0
void gt_ranges_copy_to_opposite_strand(GtArray *outranges,
                                       const GtArray *inranges,
                                       GtUword gen_total_length,
                                       GtUword gen_offset)
{
  GtRange range;
  GtUword i;

  /* outranges are empty */
  gt_assert(!gt_array_size(outranges));
  /* inranges are not empty */
  gt_assert(gt_array_size(inranges));

  for (i = gt_array_size(inranges); i > 0; i--) {
    /* genomic offset is defined */
    gt_assert(gen_offset != GT_UNDEF_UWORD);
    range.start  = gen_total_length - 1
                  - (((GtRange*) gt_array_get(inranges, i-1))->end -
                     gen_offset)
                  + gen_offset;
    range.end = gen_total_length - 1
                  - (((GtRange*) gt_array_get(inranges, i-1))->start -
                     gen_offset)
                  + gen_offset;
    gt_array_add(outranges, range);
  }

  /* outranges has the same number of elements as inranges */
  gt_assert(gt_array_size(inranges) == gt_array_size(outranges));
}
Esempio n. 6
0
static void xml_outputSCRline(const GthAGS *ags, unsigned int indentlevel,
                              GtFile *outfp)
{
  GthSpliceSiteProb *splicesiteprob;
  unsigned long i;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<SCR_line>\n");
  indentlevel++;

  for (i = 0; i < gt_array_size(ags->exons) - 1; i++) {
    splicesiteprob = (GthSpliceSiteProb*) gt_array_get(ags->splicesiteprobs, i);
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<exon-intron don_prob=\"%.3f\" "
                       "acc_prob=\"%.3f\" e_score=\"%.3f\"/>\n",
                       splicesiteprob->donorsiteprob,
                       splicesiteprob->acceptorsiteprob,
                       ((GthExonAGS*) gt_array_get(ags->exons, i))->score);
  }

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<exon-only e_score=\"%.3f\"/>\n",
                  ((GthExonAGS*) gt_array_get(ags->exons, i))->score);
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</SCR_line>\n");
}
Esempio n. 7
0
static void convert_chain_to_inverted_chain(GthInvertedChain *inverted_chain,
                                            GthChain *chain)
{
  unsigned long i, lastexonnum = gt_array_size(chain->forwardranges) - 1;
  GtRange range;

  /* inverted chain is empty */
  gt_assert(!gt_array_size(inverted_chain->forwardranges));
  /* chain is not empty */
  gt_assert(gt_array_size(chain->forwardranges));

  /* copy file and sequence numbers */
  inverted_chain->gen_file_num = chain->gen_file_num;
  inverted_chain->gen_seq_num  = chain->gen_seq_num;
  inverted_chain->ref_file_num = chain->ref_file_num;
  inverted_chain->ref_seq_num  = chain->ref_seq_num;

  /* save startpos */
  inverted_chain->startpos = ((GtRange*)
                              gt_array_get_first(chain->forwardranges))->start;

  /* save endpos */
  inverted_chain->endpos = ((GtRange*)
                             gt_array_get_last(chain->forwardranges))->end;

  /* convert (potential) exons to (potential) introns */
  for (i = 0; i < lastexonnum; i++) {
    range.start  = ((GtRange*) gt_array_get(chain->forwardranges, i))
                  ->end + 1;
    range.end = ((GtRange*) gt_array_get(chain->forwardranges, i+1))
                  ->start - 1;
    gt_array_add(inverted_chain->forwardranges, range);
  }
}
Esempio n. 8
0
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtSortStream *sort_stream;
  GtGenomeNode *node, *eofn;
  int had_err = 0;
  gt_error_check(err);
  sort_stream = gt_sort_stream_cast(ns);

  if (!sort_stream->sorted) {
    while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node,
                                           err)) && node) {
      if ((eofn = gt_eof_node_try_cast(node)))
        gt_genome_node_delete(eofn); /* get rid of EOF nodes */
      else
        gt_array_add(sort_stream->nodes, node);
    }
    if (!had_err) {
      gt_genome_nodes_sort_stable(sort_stream->nodes);
      sort_stream->sorted = true;
    }
  }

  if (!had_err) {
    gt_assert(sort_stream->sorted);
    if (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
      *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                           sort_stream->idx);
      sort_stream->idx++;
      /* join region nodes with the same sequence ID */
      if (gt_region_node_try_cast(*gn)) {
        GtRange range_a, range_b;
        while (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
          node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                                sort_stream->idx);
          if (!gt_region_node_try_cast(node) ||
              gt_str_cmp(gt_genome_node_get_seqid(*gn),
                         gt_genome_node_get_seqid(node))) {
            /* the next node is not a region node with the same ID */
            break;
          }
          range_a = gt_genome_node_get_range(*gn);
          range_b = gt_genome_node_get_range(node);
          range_a = gt_range_join(&range_a, &range_b);
          gt_genome_node_set_range(*gn, &range_a);
          gt_genome_node_delete(node);
          sort_stream->idx++;
        }
      }
      return 0;
    }
  }

  if (!had_err) {
    gt_array_reset(sort_stream->nodes);
    *gn = NULL;
  }

  return had_err;
}
Esempio n. 9
0
static double gaeval_visitor_introns_confirmed(GtArray *introns, GtArray *gaps)
{
  agn_assert(introns && gaps);
  GtUword intron_count = gt_array_size(introns);
  GtUword gap_count = gt_array_size(gaps);
  agn_assert(intron_count > 0);

  if(gap_count == 0)
    return 0.0;

  GtUword i, j, num_confirmed = 0;
  for(i = 0; i < intron_count; i++)
  {
    GtGenomeNode *intron = *(GtGenomeNode **)gt_array_get(introns, i);
    GtRange intron_range = gt_genome_node_get_range(intron);
    for(j = 0; j < gap_count; j++)
    {
      GtGenomeNode *gap = *(GtGenomeNode **)gt_array_get(gaps, j);
      GtRange gap_range = gt_genome_node_get_range(gap);
      if(gt_range_compare(&intron_range, &gap_range) == 0)
      {
        num_confirmed++;
        break;
      }
    }
  }

  return (double)num_confirmed / (double)intron_count;
}
static void snp_annotator_stream_free(GtNodeStream *ns)
{
  GtUword i;
  GtSNPAnnotatorStream *sas;
  if (!ns) return;
  sas = gt_snp_annotator_stream_cast(ns);
  gt_region_mapping_delete(sas->rmap);
  while (gt_queue_size(sas->snps) > 0) {
    gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->snps));
  }
  while (gt_queue_size(sas->outqueue) > 0) {
    gt_genome_node_delete((GtGenomeNode*) gt_queue_get(sas->outqueue));
  }
  for (i = 0; i < gt_array_size(sas->instreams); i++) {
    gt_node_stream_delete(*(GtNodeStream**) gt_array_get(sas->instreams, i));
  }
  for (i = 0; i < gt_array_size(sas->cur_gene_set); i++) {
    gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(sas->cur_gene_set, i));
  }
  gt_array_delete(sas->cur_gene_set);
  gt_node_stream_delete(sas->merge_stream);
  gt_array_delete(sas->instreams);
  gt_queue_delete(sas->snps);
  gt_queue_delete(sas->outqueue);
}
Esempio n. 11
0
static void gv_test_calc_integrity(AgnUnitTest *test)
{
  const char *filename = "data/gff3/gaeval-stream-unit-test-2.gff3";
  GtNodeStream *align_in = gt_gff3_in_stream_new_unsorted(1, &filename);
  AgnGaevalParams params = { 0.6, 0.3, 0.05, 0.05, 400, 200, 100 };
  GtNodeVisitor *nv = agn_gaeval_visitor_new(align_in, params);
  AgnGaevalVisitor *gv = gaeval_visitor_cast(nv);
  gt_node_stream_delete(align_in);

  GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename);
  GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  gt_hashmap_add(typestokeep, "mRNA", "mRNA");
  GtNodeStream *filtstream = agn_filter_stream_new(gff3in, typestokeep);
  GtLogger *logger = gt_logger_new(true, "", stderr);
  GtNodeStream *ics = agn_infer_cds_stream_new(filtstream, NULL, logger);
  GtNodeStream *ies = agn_infer_exons_stream_new(ics, NULL, logger);

  GtError *error = gt_error_new();
  GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) );
  GtNodeStream *featstream = gt_array_out_stream_new(ies, feats, error);
  int result = gt_node_stream_pull(featstream, error);
  if(result == -1)
  {
    fprintf(stderr, "[AgnGaevalVisitor::gv_test_calc_integrity] error "
            "processing GFF3: %s\n", gt_error_get(error));
    return;
  }
  gt_node_stream_delete(gff3in);
  gt_node_stream_delete(filtstream);
  gt_node_stream_delete(featstream);
  gt_node_stream_delete(ics);
  gt_node_stream_delete(ies);
  gt_logger_delete(logger);
  gt_hashmap_delete(typestokeep);

  agn_assert(gt_array_size(feats) == 2);
  GtFeatureNode *g1 = *(GtFeatureNode **)gt_array_get(feats, 0);
  GtFeatureNode *g2 = *(GtFeatureNode **)gt_array_get(feats, 1);

  double cov1 = gaeval_visitor_calculate_coverage(gv,  g1, error);
  double cov2 = gaeval_visitor_calculate_coverage(gv,  g2, error);
  double int1 = gaeval_visitor_calculate_integrity(gv, g1, cov1, NULL, error);
  double int2 = gaeval_visitor_calculate_integrity(gv, g2, cov2, NULL, error);

  bool test1 = fabs(cov1 - 1.000) < 0.001 &&
               fabs(cov2 - 0.997) < 0.001 &&
               fabs(int1 - 0.850) < 0.001 &&
               fabs(int2 - 0.863) < 0.001;
  agn_unit_test_result(test, "calculate integrity", test1);

  gt_error_delete(error);
  gt_array_delete(feats);
  gt_genome_node_delete((GtGenomeNode *)g1);
  gt_genome_node_delete((GtGenomeNode *)g2);
  gt_node_visitor_delete(nv);
}
Esempio n. 12
0
static int gt_ltr_cluster_stream_next(GtNodeStream *ns,
                                      GtGenomeNode **gn,
                                      GtError *err)
{
  GtLTRClusterStream *lcs;
  GtGenomeNode *ref_gn;
  int had_err = 0;
  unsigned long i = 0;

  gt_error_check(err);
  lcs = gt_ltr_cluster_stream_cast(ns);
  if (lcs->first_next) {
    while (!(had_err = gt_node_stream_next(lcs->in_stream, gn, err)) && *gn) {
      gt_assert(*gn && !had_err);
      ref_gn = gt_genome_node_ref(*gn);
      gt_array_add(lcs->nodes, ref_gn);
      had_err = gt_genome_node_accept(*gn, (GtNodeVisitor*) lcs->lcv, err);
      if (had_err) {
        gt_genome_node_delete(*gn);
        *gn = NULL;
        break;
      }
    }
    lcs->feat_to_encseq =
                       gt_ltr_cluster_prepare_seq_visitor_get_encseqs(lcs->lcv);
    lcs->feat_to_encseq_keys =
                      gt_ltr_cluster_prepare_seq_visitor_get_features(lcs->lcv);
    if (!had_err) {
      for (i = 0; i < gt_str_array_size(lcs->feat_to_encseq_keys); i++) {
        had_err = process_feature(lcs,
                                  gt_str_array_get(lcs->feat_to_encseq_keys, i),
                                  err);
        if (had_err)
          break;
      }
    }
    if (!had_err) {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
      lcs->first_next = false;
      return 0;
    }
  } else {
    if (lcs->next_index >= gt_array_size(lcs->nodes))
      *gn = NULL;
    else {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
    }
    return 0;
  }

  return had_err;
}
Esempio n. 13
0
static void sort_matches_and_calc_buckets(GtArray *matches, GtArray *buckets,
                                          GtUword *maxbucketlength)
{
  GtUword i, currentstart = 0, currentend = 0;
  GthMatch *matchptr;
  Bucket bucket, *bucketptr;

  gt_assert(gt_array_size(matches));

  /* sort matches */
  qsort(gt_array_get_space(matches), gt_array_size(matches), sizeof (GthMatch),
        compare_matches);

  /* init first bucket */
  matchptr = gt_array_get_first(matches);
  bucket.seqnum1  = matchptr->Storeseqnumreference;
  bucket.seqnum2  = matchptr->Storeseqnumgenomic;
  bucket.startpos = 0;

  /* calc buckets */
  for (i = 1; i < gt_array_size(matches); i++) {
    matchptr = gt_array_get(matches, i);
    if (matchptr->Storeseqnumreference != bucket.seqnum1 ||
        matchptr->Storeseqnumgenomic != bucket.seqnum2) {
      /* save the current bucket */
      currentend    = i - 1;
      bucket.length = currentend - currentstart + 1;
      gt_array_add(buckets, bucket);

      /* create new bucket */
      currentstart    = i;
      bucket.seqnum1  = matchptr->Storeseqnumreference;
      bucket.seqnum2  = matchptr->Storeseqnumgenomic;
      bucket.startpos = i;
    }
  }

  /* save last bucket */
  currentend = i - 1;
  bucket.length = currentend - currentstart + 1;
  gt_array_add(buckets, bucket);

  /* compute maximum bucket length */
  *maxbucketlength = 0;
  for (i = 0; i < gt_array_size(buckets); i++) {
    bucketptr = gt_array_get(buckets, i);
    if (bucketptr->length > *maxbucketlength)
      *maxbucketlength = bucketptr->length;
  }

  gt_assert(sum_of_bucket_lengths_equals_num_of_matches(buckets,
                                                     gt_array_size(matches)));
}
Esempio n. 14
0
void agn_transcript_structure_gbk(GtFeatureNode *transcript, FILE *outstream)
{
  gt_assert(transcript && outstream);

  GtArray *exons = gt_array_new( sizeof(GtFeatureNode *) );
  GtFeatureNodeIterator *iter = gt_feature_node_iterator_new_direct(transcript);
  GtFeatureNode *child;
  for
  (
    child = gt_feature_node_iterator_next(iter);
    child != NULL;
    child = gt_feature_node_iterator_next(iter)
  )
  {
    if(agn_gt_feature_node_is_exon_feature(child))
      gt_array_add(exons, child);
  }
  gt_feature_node_iterator_delete(iter);

  gt_assert(gt_array_size(exons) > 0);
  gt_array_sort(exons, (GtCompare)agn_gt_genome_node_compare);

  if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE)
    fputs("complement(", outstream);

  if(gt_array_size(exons) == 1)
  {
    GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, 0);
    GtRange exonrange = gt_genome_node_get_range(exon);
    fprintf(outstream, "<%lu..>%lu", exonrange.start, exonrange.end);
  }
  else
  {
    fputs("join(", outstream);
    GtUword i;
    for(i = 0; i < gt_array_size(exons); i++)
    {
      GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i);
      GtRange exonrange = gt_genome_node_get_range(exon);

      if(i == 0)
        fprintf(outstream, "<%lu..%lu", exonrange.start, exonrange.end);
      else if(i+1 == gt_array_size(exons))
        fprintf(outstream, ",%lu..>%lu", exonrange.start, exonrange.end);
      else
        fprintf(outstream, ",%lu..%lu", exonrange.start, exonrange.end);
    }
    fputs(")", outstream);
  }

  if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE)
    fputs(")", outstream);
}
static void gt_hmmer_model_hit_delete(GtHMMERModelHit *mh)
{
  unsigned long i;
  if (!mh) return;
  for (i = 0; i < gt_array_size(mh->fwd_hits); i++)
    gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->fwd_hits, i));
  gt_array_delete(mh->fwd_hits);
  for (i = 0; i < gt_array_size(mh->rev_hits); i++)
    gt_free(*(GtHMMERSingleHit**) gt_array_get(mh->rev_hits, i));
   gt_array_delete(mh->rev_hits);
  gt_free(mh);
}
Esempio n. 16
0
static GtArray*
gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment)
{
  agn_assert(genemodel && alignment);

  GtFeatureNode *genefn = gt_feature_node_cast(genemodel);
  GtFeatureNode *algnfn = gt_feature_node_cast(alignment);
  agn_assert(gt_feature_node_has_type(genefn, "mRNA"));
  GtStrand genestrand = gt_feature_node_get_strand(genefn);
  GtStrand algnstrand = gt_feature_node_get_strand(algnfn);
  if(genestrand != algnstrand)
    return NULL;

  GtArray *covered_parts = gt_array_new( sizeof(GtRange) );
  GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon);
  GtWord i;
  for(i = 0; i < gt_array_size(exons); i++)
  {
    GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i);
    GtRange exonrange = gt_genome_node_get_range(exon);

    GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn);
    GtFeatureNode *tempaln;
    GtRange nullrange = {0, 0};
    for(tempaln  = gt_feature_node_iterator_next(aniter);
        tempaln != NULL;
        tempaln  = gt_feature_node_iterator_next(aniter))
    {
      if(gt_feature_node_has_type(tempaln, "match_gap"))
        continue;

      GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln);
      GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange);
      if(gt_range_compare(&intr, &nullrange) != 0)
        gt_array_add(covered_parts, intr);
    }
    gt_feature_node_iterator_delete(aniter);
  }
  gt_array_delete(exons);

  for(i = 0; i < gt_array_size(covered_parts); i++)
  {
    GtRange *r1 = gt_array_get(covered_parts, i);
    GtUword j;
    for(j = i+1; j < gt_array_size(covered_parts); j++)
    {
      GtRange *r2 = gt_array_get(covered_parts, j);
      agn_assert(gt_range_overlap(r1, r2) == false);
    }
  }

  return covered_parts;
}
Esempio n. 17
0
bool gt_ranges_do_not_overlap(const GtArray *ranges)
{
  GtUword i;

  gt_assert(ranges && gt_array_size(ranges));

  for (i = 1; i < gt_array_size(ranges); i++) {
    if (gt_range_overlap(gt_array_get(ranges, i-1), gt_array_get(ranges, i)))
      return false;
  }
  return true;
}
Esempio n. 18
0
static int gt_ltrdigest_pdom_visitor_process_hit(GT_UNUSED void *key, void *val,
                                                 void *data,
                                                 GT_UNUSED GtError *err)
{
  GtHMMERModelHit *mh = (GtHMMERModelHit*) val;
  GtLTRdigestPdomVisitor *lv = (GtLTRdigestPdomVisitor*) data;
  const char *mdl = (const char*) key;
  GtArray *hits = NULL;
  GtUword nof_hits;
  GtFragment *frags;

  if (gt_double_compare(mh->best_fwd, mh->best_rev) <= 0)
    hits = mh->fwd_hits;
  else
    hits = mh->rev_hits;
  gt_assert(hits);
  nof_hits = gt_array_size(hits);
  if (nof_hits == 0) return 0;

  if (nof_hits > 1UL) {
    GtUword i, chainno;
    frags = gt_malloc((size_t) nof_hits * sizeof (GtFragment));
    for (i = 0; i < nof_hits; i++) {
      GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i);
      gt_assert(h);
      frags[i].startpos1 = h->hmmfrom;
      frags[i].endpos1   = h->hmmto;
      frags[i].startpos2 = h->alifrom;
      frags[i].endpos2   = h->alito;
      frags[i].weight    = (GtWord) (h->alito - h->alifrom + 1) * h->score;
      frags[i].data      = h;
    }
    qsort(frags, (size_t) nof_hits, sizeof (GtFragment),
          gt_ltrdigest_pdom_visitor_fragcmp);
    gt_log_log("%s: chaining "GT_WU" frags", mdl, nof_hits);
    gt_globalchaining_max(frags, nof_hits,
                         (GtUword) lv->chain_max_gap_length,
                         gt_ltrdigest_pdom_visitor_chainproc, &chainno);
    gt_free(frags);
    for (i = 0; i < nof_hits; i++) {
      GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i);
      (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h);
    }
  } else {
    GtUword chainno = 0UL;
    GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, 0);
    gt_array_add(h->chains, chainno);
    (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h);
  }

  return 0;
}
Esempio n. 19
0
static int get_next_free_line(GtTrack *track, GtLine **result, GtBlock *block,
                              GtError *err)
{
    unsigned long i;
    GtLine* line;
    int had_err = 0;
    bool is_occupied;
    gt_assert(track);

    /* find unoccupied line -- may need optimisation */
    for (i = 0; i < gt_array_size(track->lines); i++) {
        line = *(GtLine**) gt_array_get(track->lines, i);
        had_err = gt_line_breaker_line_is_occupied(track->lb, &is_occupied, line,
                  block, err);
        if (had_err)
            break;
        if (!is_occupied) {
            *result = line;
            return 0;
        }
    }
    /* all lines are occupied, we need o create a new one */
    if (!had_err) {
        /* if line limit is hit, do not create any more lines! */
        if (track->max_num_lines != GT_UNDEF_ULONG
                && gt_array_size(track->lines) == track->max_num_lines)
        {
            track->discarded_blocks++;
            *result = NULL;
        }
        /* make sure there is only one line if 'split_lines' is set to false */
        if (!track->split)
        {
            if (gt_array_size(track->lines) < 1)
            {
                line = gt_line_new();
                gt_array_add(track->lines, line);
            }
            else
                line = *(GtLine**) gt_array_get(track->lines, 0);
            gt_assert(gt_array_size(track->lines) == 1);
        }
        else
        {
            line = gt_line_new();
            gt_array_add(track->lines, line);
        }
        gt_assert(line);
    }
    *result = line;
    return had_err;
}
Esempio n. 20
0
static void infer_cds_visitor_check_stop(AgnInferCDSVisitor *v)
{
  if(gt_array_size(v->cds) == 0)
    return;

  const char *mrnaid = gt_feature_node_get_attribute(v->mrna, "ID");
  unsigned int ln = gt_genome_node_get_line_number((GtGenomeNode *)v->mrna);
  GtStrand strand = gt_feature_node_get_strand(v->mrna);

  GtRange stoprange;
  GtUword threeprimeindex = gt_array_size(v->cds) - 1;
  GtGenomeNode **threeprimesegment = gt_array_get(v->cds, threeprimeindex);
  stoprange = gt_genome_node_get_range(*threeprimesegment);
  stoprange.start = stoprange.end - 2;
  if(strand == GT_STRAND_REVERSE)
  {
    threeprimesegment = gt_array_get(v->cds, 0);
    stoprange = gt_genome_node_get_range(*threeprimesegment);
    stoprange.end = stoprange.start + 2;
  }

  if(gt_array_size(v->stops) > 1)
  {
    gt_logger_log(v->logger, "mRNA '%s' (line %u) has %lu stop codons", mrnaid,
                  ln, gt_array_size(v->starts));
  }
  else if(gt_array_size(v->stops) == 1)
  {
    GtGenomeNode **codon = gt_array_get(v->stops, 0);
    GtRange testrange = gt_genome_node_get_range(*codon);
    if(gt_range_compare(&stoprange, &testrange) != 0)
    {
      gt_logger_log(v->logger, "stop codon inferred from CDS [%lu, %lu] does "
                    "not match explicitly provided stop codon [%lu, %lu] for "
                    "mRNA '%s'", stoprange.start, stoprange.end,
                    testrange.start, testrange.end, mrnaid);
    }
  }
  else // agn_assert(gt_array_size(v->stops) == 0)
  {
    GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)v->mrna);
    GtGenomeNode *codonfeature = gt_feature_node_new(seqid, "stop_codon",
                                                     stoprange.start,
                                                     stoprange.end,
                                                     strand);
    if(v->source)
      gt_feature_node_set_source((GtFeatureNode *)codonfeature, v->source);
    GtFeatureNode *cf = (GtFeatureNode *)codonfeature;
    gt_feature_node_add_child(v->mrna, cf);
    gt_array_add(v->stops, cf);
  }
}
Esempio n. 21
0
static void infer_cds_visitor_infer_cds(AgnInferCDSVisitor *v)
{
  GtFeatureNode **start_codon = NULL, **stop_codon = NULL;

  bool exonsexplicit    = gt_array_size(v->exons) > 0;
  bool startcodon_check = gt_array_size(v->starts) == 1 &&
                          (start_codon = gt_array_get(v->starts, 0)) != NULL;
  bool stopcodon_check  = gt_array_size(v->stops)  == 1 &&
                          (stop_codon  = gt_array_get(v->stops,  0)) != NULL;

  if(gt_array_size(v->cds) > 0)
  {
    return;
  }
  else if(!exonsexplicit || !startcodon_check || !stopcodon_check)
  {
    return;
  }

  GtRange left_codon_range, right_codon_range;
  left_codon_range  = gt_genome_node_get_range(*(GtGenomeNode **)start_codon);
  right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon);
  if(gt_feature_node_get_strand(v->mrna) == GT_STRAND_REVERSE)
  {
    left_codon_range  = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon);
    right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon);
  }
  GtUword i;
  for(i = 0; i < gt_array_size(v->exons); i++)
  {
    GtFeatureNode *exon = *(GtFeatureNode **)gt_array_get(v->exons, i);
    GtGenomeNode *exon_gn = (GtGenomeNode *)exon;
    GtRange exon_range = gt_genome_node_get_range(exon_gn);
    GtStrand exon_strand = gt_feature_node_get_strand(exon);

    GtRange cdsrange;
    bool exon_includes_cds = infer_cds_visitor_infer_range(&exon_range,
                                                           &left_codon_range,
                                                           &right_codon_range,
                                                           &cdsrange);
    if(exon_includes_cds)
    {
      GtGenomeNode *cdsfeat;
      cdsfeat = gt_feature_node_new(gt_genome_node_get_seqid(exon_gn), "CDS",
                                    cdsrange.start, cdsrange.end, exon_strand);
      if(v->source)
        gt_feature_node_set_source((GtFeatureNode *)cdsfeat, v->source);
      gt_feature_node_add_child(v->mrna, (GtFeatureNode *)cdsfeat);
      gt_array_add(v->cds, cdsfeat);
    }
  }
}
Esempio n. 22
0
bool gt_ranges_are_sorted(const GtArray *ranges)
{
  GtUword i;

  gt_assert(ranges);

  for (i = 1; i < gt_array_size(ranges); i++) {
    if (gt_range_compare(gt_array_get(ranges, i-1),
                         gt_array_get(ranges, i)) == 1) {
      return false;
    }
  }
  return true;
}
Esempio n. 23
0
static int gtf_show_transcript(GtFeatureNode *feature_node,
                               GtGTFVisitor *gtf_visitor, GtError *err)
{
  GtFeatureNode *fn;
  GtUword i;
  int had_err;
  gt_error_check(err);
  gt_assert(feature_node && gtf_visitor);
  gt_array_reset(gtf_visitor->exon_features);
  gt_array_reset(gtf_visitor->CDS_features);
  had_err = gt_feature_node_traverse_direct_children(feature_node, gtf_visitor,
                                                     save_exon_node, err);
  if (gt_array_size(gtf_visitor->exon_features)) {
    /* sort exon features */
    qsort(gt_array_get_space(gtf_visitor->exon_features),
          gt_array_size(gtf_visitor->exon_features), sizeof (GtGenomeNode*),
          (GtCompare) gt_genome_node_compare);
    /* show exon features */
    gtf_visitor->transcript_id++;
    for (i = 0; i < gt_array_size(gtf_visitor->exon_features); i++) {
      fn = *(GtFeatureNode**) gt_array_get(gtf_visitor->exon_features, i);
      gt_gff3_output_leading(fn, gtf_visitor->outfp);
      gt_file_xprintf(gtf_visitor->outfp, "gene_id \""GT_WU"\"; transcript_id "
                      "\""GT_WU"."GT_WU"\";\n", gtf_visitor->gene_id,
                      gtf_visitor->gene_id, gtf_visitor->transcript_id);
    }
  }
  if (gt_array_size(gtf_visitor->CDS_features)) {
    /* sort CDS features */
    qsort(gt_array_get_space(gtf_visitor->CDS_features),
          gt_array_size(gtf_visitor->CDS_features), sizeof (GtGenomeNode*),
          (GtCompare) gt_genome_node_compare);
    /* show start_codon feature */
    /* fn = *(GtFeatureNode**) */ (void) gt_array_get(gtf_visitor->CDS_features,
                                                      0);
    /* XXX: to be done */

    /* show CDS features */
    for (i = 0; i < gt_array_size(gtf_visitor->CDS_features); i++) {
      fn = *(GtFeatureNode**) gt_array_get(gtf_visitor->CDS_features, i);
      gt_gff3_output_leading(fn, gtf_visitor->outfp);
      gt_file_xprintf(gtf_visitor->outfp, "gene_id \""GT_WU"\"; transcript_id "
                      "\""GT_WU"."GT_WU"\";\n", gtf_visitor->gene_id,
                      gtf_visitor->gene_id, gtf_visitor->transcript_id);
    }
    /* XXX: show stop_codon feature and shorten last CDS feature */
  }
  return had_err;
}
Esempio n. 24
0
static int gt_cluster_matches(GtClusteredSet *cs,
                              GtMatchEdgeTable *matchedgetab,
                              GtError *err)
{
  unsigned long i;
  for (i = 0; i < matchedgetab->num_of_edges; i++) {
     if (gt_clustered_set_merge_clusters(cs,
              ((GtMatchEdge*)(gt_array_get(matchedgetab->edges, i)))->matchnum0,
              ((GtMatchEdge*)(gt_array_get(matchedgetab->edges, i)))->matchnum1,
              err) != 0) {
       return -1;
    }
  }
  return 0;
}
Esempio n. 25
0
void agn_bron_kerbosch( GtArray *R, GtArray *P, GtArray *X, GtArray *cliques,
                        bool skipsimplecliques )
{
  gt_assert(R != NULL && P != NULL && X != NULL && cliques != NULL);

  if(gt_array_size(P) == 0 && gt_array_size(X) == 0)
  {
    if(skipsimplecliques == false || gt_array_size(R) != 1)
    {
      GtUword i;
      AgnTranscriptClique *clique = agn_transcript_clique_new();
      for(i = 0; i < gt_array_size(R); i++)
      {
        GtFeatureNode *transcript = *(GtFeatureNode **)gt_array_get(R, i);
        agn_transcript_clique_add(clique, transcript);
      }
      gt_array_add(cliques, clique);
    }
  }

  while(gt_array_size(P) > 0)
  {
    GtGenomeNode *v = *(GtGenomeNode **)gt_array_get(P, 0);

    // newR = R \union {v}
    GtArray *newR = agn_gt_array_copy(R, sizeof(GtGenomeNode *));
    gt_array_add(newR, v);
    // newP = P \intersect N(v)
    GtArray *newP = agn_feature_neighbors(v, P);
    // newX = X \intersect N(v)
    GtArray *newX = agn_feature_neighbors(v, X);

    // Recursive call
    // agn_bron_kerbosch(R \union {v}, P \intersect N(v), X \intersect N(X))
    agn_bron_kerbosch(newR, newP, newX, cliques, skipsimplecliques);

    // Delete temporary arrays just created
    gt_array_delete(newR);
    gt_array_delete(newP);
    gt_array_delete(newX);

    // P := P \ {v}
    gt_array_rem(P, 0);

    // X := X \union {v}
    gt_array_add(X, v);
  }
}
Esempio n. 26
0
static void split_cds_feature(GtFeatureNode *cds_feature, GtFeatureNode *fn)
{
  GtArray *parents;
  unsigned long i;
  gt_assert(cds_feature && fn);

  /* find parents */
  parents = find_cds_parents(cds_feature, fn);

  /* remove CDS feature */
  gt_feature_node_remove_leaf(fn, cds_feature);

  /* add CDS feature to all parents */
  for (i = 0; i < gt_array_size(parents); i++) {
    GtFeatureNode *parent = *(GtFeatureNode**) gt_array_get(parents, i);
    const char *id = gt_feature_node_get_attribute(parent, GT_GFF_ID);
    if (!i) {
      gt_feature_node_set_attribute(cds_feature, GT_GFF_PARENT, id);
      gt_feature_node_add_child(parent, cds_feature);
    }
    else {
      GtFeatureNode *new_cds = gt_feature_node_clone(cds_feature);
      gt_feature_node_set_attribute(new_cds, GT_GFF_PARENT, id);
      gt_feature_node_add_child(parent, new_cds);
      gt_genome_node_delete((GtGenomeNode*) cds_feature);
    }
  }

  gt_array_delete(parents);
}
Esempio n. 27
0
static void potentialintronspostpro(GtArray *intronstoprocess,
                                    unsigned long icdelta,
                                    unsigned long icminremintronlength)
{
  GtArray *originalintrons;
  GtRange potintron;
  unsigned long i, potintronlength,
       minintronlength = 2 * icdelta + icminremintronlength;

  originalintrons = gt_array_new(sizeof (GtRange));

  /* save all (potential) introns */
  gt_array_add_array(originalintrons, intronstoprocess);

  /* reset introns to process */
  gt_array_set_size(intronstoprocess, 0);

  /* store introns */
  for (i = 0; i < gt_array_size(originalintrons); i++) {
    potintron       = *(GtRange*) gt_array_get(originalintrons, i);
    potintronlength = potintron.end - potintron.start + 1;

    if (potintronlength >= minintronlength) {
      /* keep this intron (plus/minus intron deltas)
         that is, this intron is cut out later */
      potintron.start  += icdelta;
      potintron.end -= icdelta;
      gt_array_add(intronstoprocess, potintron);
    }
    /* else: skip this intron
       that is, this intron is not cut out later */
  }

  gt_array_delete(originalintrons);
}
static int seqid_info_get(SeqidInfo *seqid_info, GtUword *seqnum,
                          GtUword *filenum, GtRange *outrange,
                          const GtRange *inrange,
                          GT_UNUSED const char *filename,
                          const char *seqid, GtError *err)
{
  SeqidInfoElem *seqid_info_elem;
  GtUword i;
  gt_error_check(err);
  gt_assert(seqid_info && seqnum && outrange && inrange);
  for (i = 0; i < gt_array_size(seqid_info); i++) {
    seqid_info_elem = gt_array_get(seqid_info, i);
    if (seqid_info_elem->descrange.end == GT_UNDEF_UWORD ||
        gt_range_contains(&seqid_info_elem->descrange, inrange)) {
      *seqnum = seqid_info_elem->seqnum;
      *filenum = seqid_info_elem->filenum;
      *outrange = seqid_info_elem->descrange;
      return 0;
    }
  }
  gt_error_set(err,
               "cannot find a sequence with ID \"%s\" "
               "{range " GT_WU "," GT_WU ")",
               seqid, inrange->start, inrange->end);
  return -1;
}
Esempio n. 29
0
bool gt_ranges_are_consecutive(const GtArray *ranges)
{
  GtUword i;
  for (i = 0; i < gt_array_size(ranges); i++) {
    gt_assert(((GtRange*) gt_array_get(ranges, i))->start <=
              ((GtRange*) gt_array_get(ranges, i))->end);
    if (i) {
      /* check if ranges are consecutive */
      if (((GtRange*) gt_array_get(ranges, i-1))->end >=
          ((GtRange*) gt_array_get(ranges, i))->start) {
        return false;
      }
    }
  }
  return true;
}
Esempio n. 30
0
static GtArray* generic_ranges_uniq(GtArray *out_ranges,
                                    const GtArray *in_ranges, bool count)
{
  GtUword i, *ctr_ptr, ctr = 1;
  GtArray *count_array = NULL;
  GtRange cur  = { GT_UNDEF_UWORD, GT_UNDEF_UWORD },
        prev = { GT_UNDEF_UWORD, GT_UNDEF_UWORD };
  gt_assert(out_ranges && in_ranges);
  gt_assert(gt_ranges_are_sorted(in_ranges));
  if (count)
    count_array = gt_array_new(sizeof (GtUword));
  for (i = 0; i < gt_array_size(in_ranges); i++) {
    cur = *(GtRange*) gt_array_get(in_ranges, i);
    if (!i) {
      gt_array_add(out_ranges, cur);
      if (count)
        gt_array_add(count_array, ctr);
    }
    else {
      if (prev.start == cur.start && prev.end == cur.end) {
        if (count) {
          ctr_ptr = gt_array_get_last(count_array);
          (*ctr_ptr)++;
        }
      }
      else {
        gt_array_add(out_ranges, cur);
        if (count)
          gt_array_add(count_array, ctr);
      }
    }
    prev = cur;
  }
  return count_array;
}