Ejemplo n.º 1
0
void gth_backtrace_path_add_mismatch_with_2_gaps(GthBacktracePath *bp)
{
  Editoperation mismatch_with_2_gaps_eop = MISMATCH_WITH_2_GAPS_EOP;
  gt_assert(bp);
  gt_assert(bp->alphatype == PROTEIN_ALPHA);
  gt_assert(bp->max_identical_length == MAXIDENTICALLENGTH_PROTEIN);
  gt_array_add(bp->editoperations, mismatch_with_2_gaps_eop);
}
Ejemplo n.º 2
0
void gth_backtrace_path_add_deletion_with_1_gap(GthBacktracePath *bp)
{
  Editoperation deletion_with_1_gap_eop = DELETION_WITH_1_GAP_EOP;
  gt_assert(bp);
  gt_assert(bp->alphatype == PROTEIN_ALPHA);
  gt_assert(bp->max_identical_length == MAXIDENTICALLENGTH_PROTEIN);
  gt_array_add(bp->editoperations, deletion_with_1_gap_eop);
}
Ejemplo n.º 3
0
void* gt_class_alloc(size_t size)
{
  void *c_class;
  if (!c_classes)
    c_classes = gt_array_new(sizeof (void*));
  c_class = gt_calloc(1, size);
  gt_array_add(c_classes, c_class);
  return c_class;
}
Ejemplo n.º 4
0
void gth_backtrace_path_add_intron(GthBacktracePath *bp)
{
  Editoperation *eopptr, intron_eop = DELETIONEOP + 1;
  unsigned long eopid, lenid;
  gt_assert(bp);
  gt_assert(bp->alphatype == DNA_ALPHA || bp->alphatype == PROTEIN_ALPHA);
  if (!gt_array_size(bp->editoperations))
    gt_array_add(bp->editoperations, intron_eop);
  else {
    eopptr = gt_array_get_last(bp->editoperations);
    eopid  = *eopptr & ~bp->max_identical_length;
    lenid  = *eopptr &  bp->max_identical_length;
    if (eopid == DELETIONEOP && lenid > 0 && lenid < bp->max_identical_length)
      (*eopptr)++;
    else
      gt_array_add(bp->editoperations, intron_eop);
  }
}
Ejemplo n.º 5
0
static int storemaxmatchself(void *info,
                             GT_UNUSED const GtEncseq *encseq,
                             GtUword len,
                             GtUword pos1,
                             GtUword pos2,
                             GT_UNUSED GtError *err)
{
  Maxmatchselfinfo *maxmatchselfinfo = (Maxmatchselfinfo *) info;
  GtUword dbstart, querystart;

  if (pos1 < pos2)
  {
    dbstart = pos1;
    querystart = pos2;
  } else
  {
    dbstart = pos2;
    querystart = pos1;
  }
  if (dbstart < maxmatchselfinfo->dblen &&
      maxmatchselfinfo->dblen < querystart)
  {
    Substringmatch subm;
    GtUword pos;

    subm.len = len;
    subm.dbstart = dbstart;
    pos = querystart - (maxmatchselfinfo->dblen + 1);
    if (maxmatchselfinfo->querymarkpos == NULL)
    {
      subm.queryseqnum = 0;
      subm.querystart = pos;
    } else
    {
      GtUword queryseqnum
        = gt_encseq_sep2seqnum(maxmatchselfinfo->querymarkpos,
                                        maxmatchselfinfo->numofquerysequences,
                                        maxmatchselfinfo->querylen,
                                        pos);
      if (queryseqnum == maxmatchselfinfo->numofquerysequences)
      {
        return -1;
      }
      if (queryseqnum == 0)
      {
        subm.querystart = pos;
      } else
      {
        subm.querystart = pos -
                          (maxmatchselfinfo->querymarkpos[queryseqnum-1] + 1);
      }
      subm.queryseqnum = (uint64_t) queryseqnum;
    }
    gt_array_add(maxmatchselfinfo->results,subm);
  }
  return 0;
}
Ejemplo n.º 6
0
static int save_sequence_region(GT_UNUSED void *key, void *value, void *data,
                                GT_UNUSED GtError *err)
{
    GtGenomeNode *sr = value;
    GtArray *nodes = data;
    gt_error_check(err);
    gt_assert(sr && nodes);
    gt_array_add(nodes, sr);
    return 0;
}
Ejemplo n.º 7
0
void gth_backtrace_path_add_match(GthBacktracePath *bp,
                                  bool ensure_single_match)
{
  Editoperation *eopptr, match_eop = 1;
  unsigned long eopid, lenid;
  gt_assert(bp);
  gt_assert(bp->alphatype == DNA_ALPHA || bp->alphatype == PROTEIN_ALPHA);
  if (!gt_array_size(bp->editoperations) || ensure_single_match)
    gt_array_add(bp->editoperations, match_eop);
  else {
    eopptr = gt_array_get_last(bp->editoperations);
    eopid  = *eopptr & ~bp->max_identical_length;
    lenid  = *eopptr &  bp->max_identical_length;
    if (eopid == 0 && lenid > 0 && lenid < bp->max_identical_length)
      (*eopptr)++;
    else
      gt_array_add(bp->editoperations, match_eop);
  }
}
Ejemplo n.º 8
0
void gth_backtrace_path_add_dummy(GthBacktracePath *bp)
{
  Editoperation dummy_eop = DUMMY_EOP;
  gt_assert(bp);
  gt_assert(bp->alphatype == PROTEIN_ALPHA);
  gt_assert(bp->max_identical_length == MAXIDENTICALLENGTH_PROTEIN);
  gt_assert(bp->dummy_index == GT_UNDEF_ULONG);
  gt_array_add(bp->editoperations, dummy_eop);
  bp->dummy_index = gt_array_size(bp->editoperations) - 1;
}
Ejemplo n.º 9
0
void gt_csa_splice_form_add_sa(GtCSASpliceForm *splice_form,
                               void *spliced_alignment)
{
  gt_assert(splice_form);
  gt_assert(csa_splice_form_start(splice_form) <=
         splice_form->get_genomic_range(spliced_alignment).start);
  gt_assert(gt_csa_splice_form_strand(splice_form) ==
         splice_form->get_strand(spliced_alignment));
  gt_array_add(splice_form->spliced_alignments, spliced_alignment);
}
Ejemplo n.º 10
0
static void obo_header_add(OBOHeader *obo_header,
                           const char *tag, const char *value)
{
  OBOHeaderEntry *entry;
  gt_assert(obo_header && tag && value);
  entry = gt_malloc(sizeof *entry);
  entry->tag = gt_cstr_dup(tag);
  entry->value = gt_cstr_dup(value);
  gt_array_add(obo_header->content, entry);
}
Ejemplo n.º 11
0
static void add_tool_to_stack(const char *name, GtTool *tool, void *data)
{
  ToolIterationInfo *ti_info = data;
  ToolEntry entry;
  gt_assert(name && tool && data);
  entry.name = name;
  entry.tool = tool;
  entry.prefix = gt_str_ref(ti_info->str);
  gt_array_add(ti_info->arr, entry);
}
Ejemplo n.º 12
0
static int gt_ltr_cluster_stream_next(GtNodeStream *ns,
                                      GtGenomeNode **gn,
                                      GtError *err)
{
  GtLTRClusterStream *lcs;
  GtGenomeNode *ref_gn;
  int had_err = 0;
  unsigned long i = 0;

  gt_error_check(err);
  lcs = gt_ltr_cluster_stream_cast(ns);
  if (lcs->first_next) {
    while (!(had_err = gt_node_stream_next(lcs->in_stream, gn, err)) && *gn) {
      gt_assert(*gn && !had_err);
      ref_gn = gt_genome_node_ref(*gn);
      gt_array_add(lcs->nodes, ref_gn);
      had_err = gt_genome_node_accept(*gn, (GtNodeVisitor*) lcs->lcv, err);
      if (had_err) {
        gt_genome_node_delete(*gn);
        *gn = NULL;
        break;
      }
    }
    lcs->feat_to_encseq =
                       gt_ltr_cluster_prepare_seq_visitor_get_encseqs(lcs->lcv);
    lcs->feat_to_encseq_keys =
                      gt_ltr_cluster_prepare_seq_visitor_get_features(lcs->lcv);
    if (!had_err) {
      for (i = 0; i < gt_str_array_size(lcs->feat_to_encseq_keys); i++) {
        had_err = process_feature(lcs,
                                  gt_str_array_get(lcs->feat_to_encseq_keys, i),
                                  err);
        if (had_err)
          break;
      }
    }
    if (!had_err) {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
      lcs->first_next = false;
      return 0;
    }
  } else {
    if (lcs->next_index >= gt_array_size(lcs->nodes))
      *gn = NULL;
    else {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
    }
    return 0;
  }

  return had_err;
}
Ejemplo n.º 13
0
static GtArray*
gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment)
{
  agn_assert(genemodel && alignment);

  GtFeatureNode *genefn = gt_feature_node_cast(genemodel);
  GtFeatureNode *algnfn = gt_feature_node_cast(alignment);
  agn_assert(gt_feature_node_has_type(genefn, "mRNA"));
  GtStrand genestrand = gt_feature_node_get_strand(genefn);
  GtStrand algnstrand = gt_feature_node_get_strand(algnfn);
  if(genestrand != algnstrand)
    return NULL;

  GtArray *covered_parts = gt_array_new( sizeof(GtRange) );
  GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon);
  GtWord i;
  for(i = 0; i < gt_array_size(exons); i++)
  {
    GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i);
    GtRange exonrange = gt_genome_node_get_range(exon);

    GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn);
    GtFeatureNode *tempaln;
    GtRange nullrange = {0, 0};
    for(tempaln  = gt_feature_node_iterator_next(aniter);
        tempaln != NULL;
        tempaln  = gt_feature_node_iterator_next(aniter))
    {
      if(gt_feature_node_has_type(tempaln, "match_gap"))
        continue;

      GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln);
      GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange);
      if(gt_range_compare(&intr, &nullrange) != 0)
        gt_array_add(covered_parts, intr);
    }
    gt_feature_node_iterator_delete(aniter);
  }
  gt_array_delete(exons);

  for(i = 0; i < gt_array_size(covered_parts); i++)
  {
    GtRange *r1 = gt_array_get(covered_parts, i);
    GtUword j;
    for(j = i+1; j < gt_array_size(covered_parts); j++)
    {
      GtRange *r2 = gt_array_get(covered_parts, j);
      agn_assert(gt_range_overlap(r1, r2) == false);
    }
  }

  return covered_parts;
}
Ejemplo n.º 14
0
void agn_transcript_structure_gbk(GtFeatureNode *transcript, FILE *outstream)
{
  gt_assert(transcript && outstream);

  GtArray *exons = gt_array_new( sizeof(GtFeatureNode *) );
  GtFeatureNodeIterator *iter = gt_feature_node_iterator_new_direct(transcript);
  GtFeatureNode *child;
  for
  (
    child = gt_feature_node_iterator_next(iter);
    child != NULL;
    child = gt_feature_node_iterator_next(iter)
  )
  {
    if(agn_gt_feature_node_is_exon_feature(child))
      gt_array_add(exons, child);
  }
  gt_feature_node_iterator_delete(iter);

  gt_assert(gt_array_size(exons) > 0);
  gt_array_sort(exons, (GtCompare)agn_gt_genome_node_compare);

  if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE)
    fputs("complement(", outstream);

  if(gt_array_size(exons) == 1)
  {
    GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, 0);
    GtRange exonrange = gt_genome_node_get_range(exon);
    fprintf(outstream, "<%lu..>%lu", exonrange.start, exonrange.end);
  }
  else
  {
    fputs("join(", outstream);
    GtUword i;
    for(i = 0; i < gt_array_size(exons); i++)
    {
      GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i);
      GtRange exonrange = gt_genome_node_get_range(exon);

      if(i == 0)
        fprintf(outstream, "<%lu..%lu", exonrange.start, exonrange.end);
      else if(i+1 == gt_array_size(exons))
        fprintf(outstream, ",%lu..>%lu", exonrange.start, exonrange.end);
      else
        fprintf(outstream, ",%lu..%lu", exonrange.start, exonrange.end);
    }
    fputs(")", outstream);
  }

  if(gt_feature_node_get_strand(transcript) == GT_STRAND_REVERSE)
    fputs(")", outstream);
}
Ejemplo n.º 15
0
GtFeatureNodeIterator* gt_feature_node_iterator_new(const GtFeatureNode *fn)
{
  GtFeatureNodeIterator *fni;
  GtFeatureNode *child_feature;
  GtDlistelem *dlistelem;
  gt_assert(fn);
  fni = feature_node_iterator_new_base(fn);
  if (gt_feature_node_is_pseudo((GtFeatureNode*) fn)) {
    /* add the children backwards to traverse in order */
    for (dlistelem = gt_dlist_last(fn->children); dlistelem != NULL;
         dlistelem = gt_dlistelem_previous(dlistelem)) {
      child_feature = (GtFeatureNode*) gt_dlistelem_get_data(dlistelem);
      gt_array_add(fni->feature_stack, child_feature);
    }
  }
  else
    gt_array_add(fni->feature_stack, fni->fn);
  gt_assert(gt_array_size(fni->feature_stack));
  fni->direct = false;
  return fni;
}
Ejemplo n.º 16
0
static GtArray* make_list_of_chain_fragments(GtChain *chain,
                                             GtFragment *fragments,
                                             unsigned long num_of_fragments,
                                             bool enrichchains,
                                             const GtRange *genomicrange)
{
  unsigned long i, fragnum;
  GtArray *chain_fragments;
  GthJTMatch match;
  gt_assert(chain && fragments && num_of_fragments);
  chain_fragments = gt_array_new(sizeof (GthJTMatch));
  if (!enrichchains) {
    /* no chain enrichment used -> store all fragments from chain */
    for (i = 0; i < gt_chain_size(chain); i++) {
      fragnum = gt_chain_get_fragnum(chain, i);
      match.gen_range.start = fragments[fragnum].startpos2;
      match.gen_range.end   = fragments[fragnum].endpos2;
      match.ref_range.start = fragments[fragnum].startpos1;
      match.ref_range.end   = fragments[fragnum].endpos1;
      gt_array_add(chain_fragments, match);
    }
  }
  else {
    GtRange fragmentrange;
    /* chain enrichment used -> store all fragments which overlap with genomic
       range of computed chain */
    for (i = 0; i < num_of_fragments; i++) {
      fragmentrange.start  = fragments[i].startpos2;
      fragmentrange.end = fragments[i].endpos2;
      if (gt_range_overlap(genomicrange, &fragmentrange)) {
        match.gen_range.start = fragments[i].startpos2;
        match.gen_range.end   = fragments[i].endpos2;
        match.ref_range.start = fragments[i].startpos1;
        match.ref_range.end   = fragments[i].endpos1;
        gt_array_add(chain_fragments, match);
      }
    }
  }
  return chain_fragments;
}
Ejemplo n.º 17
0
static int gt_ltrdigest_pdom_visitor_process_hit(GT_UNUSED void *key, void *val,
                                                 void *data,
                                                 GT_UNUSED GtError *err)
{
  GtHMMERModelHit *mh = (GtHMMERModelHit*) val;
  GtLTRdigestPdomVisitor *lv = (GtLTRdigestPdomVisitor*) data;
  const char *mdl = (const char*) key;
  GtArray *hits = NULL;
  GtUword nof_hits;
  GtFragment *frags;

  if (gt_double_compare(mh->best_fwd, mh->best_rev) <= 0)
    hits = mh->fwd_hits;
  else
    hits = mh->rev_hits;
  gt_assert(hits);
  nof_hits = gt_array_size(hits);
  if (nof_hits == 0) return 0;

  if (nof_hits > 1UL) {
    GtUword i, chainno;
    frags = gt_malloc((size_t) nof_hits * sizeof (GtFragment));
    for (i = 0; i < nof_hits; i++) {
      GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i);
      gt_assert(h);
      frags[i].startpos1 = h->hmmfrom;
      frags[i].endpos1   = h->hmmto;
      frags[i].startpos2 = h->alifrom;
      frags[i].endpos2   = h->alito;
      frags[i].weight    = (GtWord) (h->alito - h->alifrom + 1) * h->score;
      frags[i].data      = h;
    }
    qsort(frags, (size_t) nof_hits, sizeof (GtFragment),
          gt_ltrdigest_pdom_visitor_fragcmp);
    gt_log_log("%s: chaining "GT_WU" frags", mdl, nof_hits);
    gt_globalchaining_max(frags, nof_hits,
                         (GtUword) lv->chain_max_gap_length,
                         gt_ltrdigest_pdom_visitor_chainproc, &chainno);
    gt_free(frags);
    for (i = 0; i < nof_hits; i++) {
      GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, i);
      (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h);
    }
  } else {
    GtUword chainno = 0UL;
    GtHMMERSingleHit *h = *(GtHMMERSingleHit**) gt_array_get(hits, 0);
    gt_array_add(h->chains, chainno);
    (void) gt_ltrdigest_pdom_visitor_attach_hit(lv, mh, h);
  }

  return 0;
}
Ejemplo n.º 18
0
static void infer_cds_visitor_check_stop(AgnInferCDSVisitor *v)
{
  if(gt_array_size(v->cds) == 0)
    return;

  const char *mrnaid = gt_feature_node_get_attribute(v->mrna, "ID");
  unsigned int ln = gt_genome_node_get_line_number((GtGenomeNode *)v->mrna);
  GtStrand strand = gt_feature_node_get_strand(v->mrna);

  GtRange stoprange;
  GtUword threeprimeindex = gt_array_size(v->cds) - 1;
  GtGenomeNode **threeprimesegment = gt_array_get(v->cds, threeprimeindex);
  stoprange = gt_genome_node_get_range(*threeprimesegment);
  stoprange.start = stoprange.end - 2;
  if(strand == GT_STRAND_REVERSE)
  {
    threeprimesegment = gt_array_get(v->cds, 0);
    stoprange = gt_genome_node_get_range(*threeprimesegment);
    stoprange.end = stoprange.start + 2;
  }

  if(gt_array_size(v->stops) > 1)
  {
    gt_logger_log(v->logger, "mRNA '%s' (line %u) has %lu stop codons", mrnaid,
                  ln, gt_array_size(v->starts));
  }
  else if(gt_array_size(v->stops) == 1)
  {
    GtGenomeNode **codon = gt_array_get(v->stops, 0);
    GtRange testrange = gt_genome_node_get_range(*codon);
    if(gt_range_compare(&stoprange, &testrange) != 0)
    {
      gt_logger_log(v->logger, "stop codon inferred from CDS [%lu, %lu] does "
                    "not match explicitly provided stop codon [%lu, %lu] for "
                    "mRNA '%s'", stoprange.start, stoprange.end,
                    testrange.start, testrange.end, mrnaid);
    }
  }
  else // agn_assert(gt_array_size(v->stops) == 0)
  {
    GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)v->mrna);
    GtGenomeNode *codonfeature = gt_feature_node_new(seqid, "stop_codon",
                                                     stoprange.start,
                                                     stoprange.end,
                                                     strand);
    if(v->source)
      gt_feature_node_set_source((GtFeatureNode *)codonfeature, v->source);
    GtFeatureNode *cf = (GtFeatureNode *)codonfeature;
    gt_feature_node_add_child(v->mrna, cf);
    gt_array_add(v->stops, cf);
  }
}
Ejemplo n.º 19
0
static void infer_cds_visitor_infer_cds(AgnInferCDSVisitor *v)
{
  GtFeatureNode **start_codon = NULL, **stop_codon = NULL;

  bool exonsexplicit    = gt_array_size(v->exons) > 0;
  bool startcodon_check = gt_array_size(v->starts) == 1 &&
                          (start_codon = gt_array_get(v->starts, 0)) != NULL;
  bool stopcodon_check  = gt_array_size(v->stops)  == 1 &&
                          (stop_codon  = gt_array_get(v->stops,  0)) != NULL;

  if(gt_array_size(v->cds) > 0)
  {
    return;
  }
  else if(!exonsexplicit || !startcodon_check || !stopcodon_check)
  {
    return;
  }

  GtRange left_codon_range, right_codon_range;
  left_codon_range  = gt_genome_node_get_range(*(GtGenomeNode **)start_codon);
  right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon);
  if(gt_feature_node_get_strand(v->mrna) == GT_STRAND_REVERSE)
  {
    left_codon_range  = gt_genome_node_get_range(*(GtGenomeNode **)stop_codon);
    right_codon_range = gt_genome_node_get_range(*(GtGenomeNode **)start_codon);
  }
  GtUword i;
  for(i = 0; i < gt_array_size(v->exons); i++)
  {
    GtFeatureNode *exon = *(GtFeatureNode **)gt_array_get(v->exons, i);
    GtGenomeNode *exon_gn = (GtGenomeNode *)exon;
    GtRange exon_range = gt_genome_node_get_range(exon_gn);
    GtStrand exon_strand = gt_feature_node_get_strand(exon);

    GtRange cdsrange;
    bool exon_includes_cds = infer_cds_visitor_infer_range(&exon_range,
                                                           &left_codon_range,
                                                           &right_codon_range,
                                                           &cdsrange);
    if(exon_includes_cds)
    {
      GtGenomeNode *cdsfeat;
      cdsfeat = gt_feature_node_new(gt_genome_node_get_seqid(exon_gn), "CDS",
                                    cdsrange.start, cdsrange.end, exon_strand);
      if(v->source)
        gt_feature_node_set_source((GtFeatureNode *)cdsfeat, v->source);
      gt_feature_node_add_child(v->mrna, (GtFeatureNode *)cdsfeat);
      gt_array_add(v->cds, cdsfeat);
    }
  }
}
Ejemplo n.º 20
0
static void add_children_to_stack(GtArray *feature_stack,
                                  const GtFeatureNode *fn)
{
  GtFeatureNode *child;
  GtDlistelem *dlistelem;
  gt_assert(feature_stack && fn && fn->children);
  /* add the children backwards to traverse in order */
  for (dlistelem = gt_dlist_last(fn->children); dlistelem != NULL;
       dlistelem = gt_dlistelem_previous(dlistelem)) {
    child = gt_dlistelem_get_data(dlistelem);
    gt_array_add(feature_stack, child);
  }
}
Ejemplo n.º 21
0
void gt_splicedseq_add(Splicedseq *ss, unsigned long start, unsigned long end,
                       const char *original_sequence)
{
  unsigned long i;
  gt_assert(ss && start <= end && original_sequence);
  gt_str_append_cstr_nt(ss->splicedseq, original_sequence,
                        end - start + 1);
  /* make sure elements are added in ascending order */
  gt_assert(!gt_array_size(ss->positionmapping) ||
            start > *(unsigned long*) gt_array_get_last(ss->positionmapping));
  for (i = start; i <= end; i++)
    gt_array_add(ss->positionmapping, i);
}
static SeqidInfo* seqid_info_new(GtUword seqnum, GtUword filenum,
                                 const GtRange *descrange)
{
  SeqidInfoElem seqid_info_elem;
  GtArray *seqid_info;
  gt_assert(descrange);
  seqid_info = gt_array_new(sizeof (SeqidInfoElem));
  seqid_info_elem.seqnum = seqnum;
  seqid_info_elem.filenum = filenum;
  seqid_info_elem.descrange = *descrange;
  gt_array_add(seqid_info, seqid_info_elem);
  return seqid_info;
}
Ejemplo n.º 23
0
int checkspecialrangesfast(const Encodedsequence *encseq)
{
  GtArray *rangesforward, *rangesbackward;
  bool haserr = false;
  Specialrangeiterator *sri;
  Sequencerange range;

  if (!hasspecialranges(encseq))
  {
    return 0;
  }
  rangesforward = gt_array_new(sizeof (Sequencerange));
  rangesbackward = gt_array_new(sizeof (Sequencerange));

  sri = newspecialrangeiterator(encseq,true);
  while (nextspecialrangeiterator(&range,sri))
  {
    gt_array_add(rangesforward,range);
  }
  freespecialrangeiterator(&sri);
  sri = newspecialrangeiterator(encseq,false);
  while (nextspecialrangeiterator(&range,sri))
  {
    gt_array_add(rangesbackward,range);
  }
  freespecialrangeiterator(&sri);
  gt_array_reverse(rangesbackward);
  if (!haserr)
  {
    if (array_compare(rangesforward,rangesbackward,
                      compareSequencerange) != 0)
    {
      exit(GT_EXIT_PROGRAMMING_ERROR);
    }
  }
  gt_array_delete(rangesforward);
  gt_array_delete(rangesbackward);
  return haserr ? - 1 : 0;
}
Ejemplo n.º 24
0
GtCSASpliceForm* gt_csa_splice_form_new(void *spliced_alignment,
                                   GetGenomicRangeFunc get_genomic_range,
                                   GetStrandFunc get_strand)
{
  GtCSASpliceForm *splice_form;
  gt_assert(spliced_alignment && get_strand);
  splice_form = gt_malloc(sizeof (*splice_form));
  splice_form->spliced_alignments = gt_array_new(sizeof (void*));
  gt_array_add(splice_form->spliced_alignments, spliced_alignment);
  splice_form->get_genomic_range = get_genomic_range;
  splice_form->get_strand = get_strand;
  return splice_form;
}
Ejemplo n.º 25
0
void gth_backtrace_path_add_intron_with_2_bases_left(GthBacktracePath *bp)
{
  Editoperation *eopptr,
                intron_with_2_bases_left_eop = DELETION_WITH_2_GAPS_EOP + 1;
  unsigned long eopid, lenid;
  gt_assert(bp);
  gt_assert(bp->alphatype == PROTEIN_ALPHA);
  gt_assert(bp->max_identical_length == MAXIDENTICALLENGTH_PROTEIN);
  if (!gt_array_size(bp->editoperations))
    gt_array_add(bp->editoperations, intron_with_2_bases_left_eop);
  else {
    eopptr = gt_array_get_last(bp->editoperations);
    eopid  = *eopptr & ~bp->max_identical_length;
    lenid  = *eopptr &  bp->max_identical_length;
    if (eopid ==  DELETION_WITH_2_GAPS_EOP && lenid > 0 &&
        lenid < bp->max_identical_length) {
      (*eopptr)++;
    }
    else
      gt_array_add(bp->editoperations, intron_with_2_bases_left_eop);
  }
}
Ejemplo n.º 26
0
GtArray* agn_enumerate_feature_cliques(GtArray *feature_set)
{
  GtArray *cliques = gt_array_new( sizeof(GtArray *) );

  if(gt_array_size(feature_set) == 1)
  {
    GtFeatureNode *fn = *(GtFeatureNode **)gt_array_get(feature_set, 0);
    AgnTranscriptClique *clique = agn_transcript_clique_new();
    agn_transcript_clique_add(clique, fn);
    gt_array_add(cliques, clique);
  }
  else
  {
    // First add each transcript as a clique, even if it is not a maximal clique
    GtUword i;
    for(i = 0; i < gt_array_size(feature_set); i++)
    {
      GtFeatureNode *fn = *(GtFeatureNode **)gt_array_get(feature_set, i);
      AgnTranscriptClique *clique = agn_transcript_clique_new();
      agn_transcript_clique_add(clique, fn);
      gt_array_add(cliques, clique);
    }

    // Then use the Bron-Kerbosch algorithm to find all maximal cliques
    // containing >1 transcript
    GtArray *R = gt_array_new( sizeof(GtGenomeNode *) );
    GtArray *P = agn_gt_array_copy(feature_set, sizeof(GtGenomeNode *));
    GtArray *X = gt_array_new( sizeof(GtGenomeNode *) );

    // Initial call: agn_bron_kerbosch(\emptyset, vertex_set, \emptyset )
    agn_bron_kerbosch(R, P, X, cliques, true);

    gt_array_delete(R);
    gt_array_delete(P);
    gt_array_delete(X);
  }

  return cliques;
}
Ejemplo n.º 27
0
void gth_sa_get_exons(const GthSA *sa, GtArray *ranges)
{
  Exoninfo *exoninfo;
  GtUword i;
  GtRange range;
  gt_assert(sa && ranges);
  for (i = 0; i < gt_array_size(sa->exons); i++) {
    exoninfo = gt_array_get(sa->exons, i);
    range.start = exoninfo->leftgenomicexonborder;
    range.end = exoninfo->rightgenomicexonborder;
    gt_array_add(ranges, range);
  }
}
Ejemplo n.º 28
0
int main(int argc, char **argv)
{
  if(argc != 2)
  {
    fputs("usage: vang schema.file\n", stderr);
    return 1;
  }

  gt_lib_init();

  char *schemafile = argv[1];
  FILE *schema = fopen(schemafile, "r");
  if(schema == NULL)
  {
    fprintf(stderr, "error: unable to open schema file '%s'\n", schemafile);
    return 1;
  }

  GtArray *entry_datatypes = gt_array_new( sizeof(char *) );
  GtHashmap *entries = gt_hashmap_new( GT_HASH_STRING, NULL,
                                       (GtFree)vang_schema_entry_delete );
  VangSchemaEntry *entry;
  while( (entry = vang_schema_entry_next(schema)) != NULL )
  {
    char *datatype = (char *)vang_schema_entry_get_datatype(entry);
    VangSchemaEntry *testentry = gt_hashmap_get(entries, datatype);
    if(testentry != NULL)
    {
      fprintf( stderr, "warning: already have an entry for data type '%s'; "
               "replacing\n", datatype );
      vang_schema_entry_delete(testentry);
    }
    gt_hashmap_add(entries, datatype, entry);
    gt_array_add(entry_datatypes, datatype);
  }

  unsigned long i;
  for(i = 0; i < gt_array_size(entry_datatypes); i++)
  {
    const char *type = *(const char **)gt_array_get(entry_datatypes, i);
    VangSchemaEntry *entry = gt_hashmap_get(entries, type);
    vang_schema_entry_to_string(entry, stdout);
    puts("");
  }
  gt_array_delete(entry_datatypes);
  gt_hashmap_delete(entries);

  gt_lib_clean();

  return 0;
}
Ejemplo n.º 29
0
void gt_block_insert_element(GtBlock *block, GtFeatureNode *node)
{
  GtElement *element;
  gt_assert(block && node);
  if (!block->top_level_feature) {
    block->top_level_feature = (GtFeatureNode*)
                               gt_genome_node_ref((GtGenomeNode*) node);
  }
  element = gt_element_new(node);
  /* invalidate sortedness flag because insertion of element at the end
     may break ordering */
  block->sorted = false;
  gt_array_add(block->elements, element);
}
Ejemplo n.º 30
0
void gt_block_merge(GtBlock *b1, GtBlock *b2)
{
  unsigned int GT_UNUSED merged_size, i;
  gt_assert(b1 && b2);
  merged_size = gt_block_get_size(b1) + gt_block_get_size(b2);
  for (i=0;i<gt_array_size(b2->elements);i++)
  {
    GtElement *elem;
    elem = gt_element_ref(*(GtElement**) gt_array_get(b2->elements, i));
    gt_assert(elem);
    gt_array_add(b1->elements, elem);
  }
  gt_assert(gt_block_get_size(b1) == merged_size);
}