Esempio n. 1
0
static int compute_statistics(GtFeatureNode *fn, void *data, GtError *err)
{
    GtStatVisitor *sv;
    GT_UNUSED int rval;
    gt_error_check(err);
    gt_assert(data);
    sv = (GtStatVisitor*) data;
    if (gt_feature_node_is_multi(fn) &&
            gt_feature_node_get_multi_representative(fn) == fn) {
        sv->number_of_multi_features++;
    }
    if (sv->used_sources)
        compute_source_statistics(fn, sv->used_sources);
    compute_type_statistics(fn, sv);
    if (sv->exon_number_distribution || sv->cds_length_distribution) {
        sv->exon_number_for_distri = 0;
        sv->cds_length_for_distri = 0;
        rval = gt_feature_node_traverse_direct_children(fn, sv,
                add_exon_or_cds_number,
                err);
        gt_assert(!rval); /* add_exon_or_cds_number() is sane */
        if (sv->exon_number_distribution && sv->exon_number_for_distri) {
            gt_disc_distri_add(sv->exon_number_distribution,
                               sv->exon_number_for_distri);
        }
        if (sv->cds_length_distribution && sv->cds_length_for_distri) {
            gt_disc_distri_add(sv->cds_length_distribution,
                               sv->cds_length_for_distri);
        }
    }
    return 0;
}
Esempio n. 2
0
File: stat.c Progetto: 9beckert/TIR
void gth_stat_add_to_sa_alignment_score_distri(GthStat *stat,
                                               unsigned long data)
{
  gt_assert(stat);
  if (stat->sa_stats)
    gt_disc_distri_add(stat->sa_alignment_score_distribution, data);
}
Esempio n. 3
0
static void gthinitfragments(GtFragment *fragments,
                             GtUword *num_of_fragments,
                             GthMatch *storematchtab,
                             GtUword numofmatches,
                             GtUword rare,
                             double fragweightfactor)
{
  GthMatch *mptr;
  GtFragment *fragmentptr;
  GtWord tmp, largestdim1 = 0, largestdim2 = 0;
  GtDiscDistri *startpointdistri = NULL;

  /* init number of fragments */
  *num_of_fragments = 0;
  if (rare)
    startpointdistri = gt_disc_distri_new();

  for (mptr = storematchtab; mptr < storematchtab + numofmatches; mptr++) {
    /* first dimension */
    tmp = mptr->Storepositionreference + mptr->Storelengthreference - 1;
    if (largestdim1 < tmp)
      largestdim1 = tmp;

    /* second dimension */
    tmp = mptr->Storepositiongenomic + mptr->Storelengthgenomic - 1;
    if (largestdim2 < tmp)
      largestdim2 = tmp;
  }

  for (mptr = storematchtab, fragmentptr = fragments;
       mptr < storematchtab + numofmatches;
       mptr++) {
    if (rare)
      gt_disc_distri_add(startpointdistri, mptr->Storepositionreference);
    if ((!rare ||
         gt_disc_distri_get(startpointdistri, mptr->Storepositionreference)
         <= rare) &&
        (mptr == storematchtab || /* is the first match */
         !gth_matches_are_equal(mptr, mptr-1))) { /* or is different from last
                                                     one */
      fragmentptr->weight     = (GtWord) (fragweightfactor *
                                        (double) abs(mptr->Storescore));
      fragmentptr->startpos1  = mptr->Storepositionreference;
      fragmentptr->endpos1    = mptr->Storepositionreference
                                + mptr->Storelengthreference - 1;
      fragmentptr->startpos2  = mptr->Storepositiongenomic;
      fragmentptr->endpos2    = mptr->Storepositiongenomic
                                + mptr->Storelengthgenomic - 1;
      fragmentptr++;
      (*num_of_fragments)++;
    }
  }

  gt_disc_distri_delete(startpointdistri);

  gt_assert(*num_of_fragments <= numofmatches);
}
Esempio n. 4
0
static void compute_type_statistics(GtFeatureNode *fn, GtStatVisitor *sv)
{
    GtRange range;
    gt_assert(fn && sv);
    if (gt_feature_node_has_type(fn, gt_ft_gene)) {
        sv->number_of_genes++;
        if (gt_feature_node_has_CDS(fn))
            sv->number_of_protein_coding_genes++;
        if (sv->gene_length_distribution) {
            range = gt_genome_node_get_range((GtGenomeNode*) fn);
            gt_disc_distri_add(sv->gene_length_distribution, gt_range_length(&range));
        }
        if (sv->gene_score_distribution) {
            gt_disc_distri_add(sv->gene_score_distribution,
                               gt_feature_node_get_score(fn) * 100.0);
        }
    }
    else if (gt_feature_node_has_type(fn, gt_ft_mRNA)) {
        sv->number_of_mRNAs++;
        if (gt_feature_node_has_CDS(fn))
            sv->number_of_protein_coding_mRNAs++;
    }
    else if (gt_feature_node_has_type(fn, gt_ft_exon)) {
        sv->number_of_exons++;
        if (sv->exon_length_distribution) {
            range = gt_genome_node_get_range((GtGenomeNode*) fn);
            gt_disc_distri_add(sv->exon_length_distribution,
                               gt_range_length(&range));
        }
    }
    else if (gt_feature_node_has_type(fn, gt_ft_CDS)) {
        sv->number_of_CDSs++;
    }
    else if (gt_feature_node_has_type(fn, gt_ft_intron)) {
        if (sv->intron_length_distribution) {
            range = gt_genome_node_get_range((GtGenomeNode*) fn);
            gt_disc_distri_add(sv->intron_length_distribution,
                               gt_range_length(&range));
        }
    }
    else if (gt_feature_node_has_type(fn, gt_ft_LTR_retrotransposon)) {
        sv->number_of_LTR_retrotransposons++;
    }
}
GtDiscDistri *gt_condenseq_link_length_dist(const GtCondenseq *condenseq)
{
  GtUword idx;
  GtDiscDistri *res = gt_disc_distri_new();

  for (idx = 0; idx < condenseq->ldb_nelems; idx++) {
    gt_disc_distri_add(res, condenseq->links[idx].len);
  }
  return res;
}
Esempio n. 6
0
void gt_bioseq_show_seqlengthdistri(GtBioseq *bs, GtFile *outfp)
{
  GtDiscDistri *d;
  GtUword i;
  gt_assert(bs);
  d = gt_disc_distri_new();
  for (i = 0; i < gt_bioseq_number_of_sequences(bs); i++)
    gt_disc_distri_add(d, gt_bioseq_get_sequence_length(bs, i));
  gt_file_xprintf(outfp, "sequence length distribution:\n");
  gt_disc_distri_show(d, outfp);
  gt_disc_distri_delete(d);
}
Esempio n. 7
0
static void addSAtoexondistribution(GtDiscDistri *exondistribution,
                                    GthSA *sa)
{
  Exoninfo *exoninfo;
  unsigned long i;

  /* add values to exondistribution */
  for (i = 0; i < gth_sa_num_of_exons(sa); i++) {
    exoninfo = gth_sa_get_exon(sa, i);
    gt_disc_distri_add(exondistribution, exoninfo->rightgenomicexonborder -
                                         exoninfo->leftgenomicexonborder + 1);
  }
}
Esempio n. 8
0
static void addSAtointrondistribution(GtDiscDistri *introndistribution,
                                      GthSA *sa)
{
  unsigned long i;

  /* add values to introndistribution */
  for (i = 0; i < gth_sa_num_of_introns(sa); i++) {
    gt_disc_distri_add(introndistribution,
                       gth_sa_get_exon(sa, i+1)
                       ->leftgenomicexonborder -
                       gth_sa_get_exon(sa, i)
                       ->rightgenomicexonborder - 1);
  }
}
GtDiscDistri *gt_condenseq_link_comp_dist(const GtCondenseq *condenseq)
{
  GtUword idx;
  GtDiscDistri *res = gt_disc_distri_new();

  for (idx = 0; idx < condenseq->ldb_nelems; idx++) {
    GtEditscript *es = condenseq->links[idx].editscript;
    GtUword vlen;
    size_t size;
    vlen = gt_editscript_get_target_len(es);
    size = gt_editscript_size(es);
    gt_disc_distri_add(res, (GtUword) ((double) size/(double) vlen * 100));
  }
  return res;
}
Esempio n. 10
0
int gt_disc_distri_unit_test(GtError *err)
{
  GtDiscDistri *d;
  int had_err = 0;

  gt_error_check(err);

  d = gt_disc_distri_new();

  ensure(had_err, gt_disc_distri_get(d, 0) == 0);
  ensure(had_err, gt_disc_distri_get(d, 100) == 0);
  if (!had_err) {
    gt_disc_distri_add(d, 0);
    gt_disc_distri_add_multi(d, 100, 256);
  }
  ensure(had_err, gt_disc_distri_get(d, 0) == 1);
  ensure(had_err, gt_disc_distri_get(d, 100) == 256);

  gt_disc_distri_delete(d);

  return had_err;
}
Esempio n. 11
0
static void outlcpvalues(Lcpsubtab *lcpsubtab,
                         GtUword width,
                         GtUword posoffset)
{
  GtUword idx, lcpvalue;
  Largelcpvalue *largelcpvalueptr;

  gt_assert(lcpsubtab != NULL && lcpsubtab->lcp2file != NULL);
  lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue = 0;
  if (lcpsubtab->tableoflcpvalues.numoflargelcpvalues > 0 &&
      lcpsubtab->tableoflcpvalues.numoflargelcpvalues >=
      lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue)
  {
    lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue
      = gt_realloc(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue,
                   sizeof (*lcpsubtab->lcp2file->largelcpvalues.
                           spaceLargelcpvalue) *
                   lcpsubtab->tableoflcpvalues.numoflargelcpvalues);
    lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue
      = lcpsubtab->tableoflcpvalues.numoflargelcpvalues;
  }
  for (idx=0; idx<width; idx++)
  {
    lcpvalue = gt_lcptab_getvalue(&lcpsubtab->tableoflcpvalues,0,idx);
    if (lcpsubtab->lcp2file->maxbranchdepth < lcpvalue)
    {
      lcpsubtab->lcp2file->maxbranchdepth = lcpvalue;
    }
    if (lcpvalue < (GtUword) LCPOVERFLOW)
    {
      lcpsubtab->lcp2file->smalllcpvalues[idx] = (uint8_t) lcpvalue;
    } else
    {
      gt_assert(lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue
                < lcpsubtab->lcp2file->largelcpvalues.
                                             allocatedLargelcpvalue);
      largelcpvalueptr
        = lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue +
          lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue++;
      largelcpvalueptr->position = posoffset + idx;
      largelcpvalueptr->value = lcpvalue;
      lcpsubtab->lcp2file->smalllcpvalues[idx] = LCPOVERFLOW;
    }
    lcpsubtab->lcptabsum += (double) lcpvalue;
    if (lcpsubtab->distlcpvalues != NULL)
    {
      gt_disc_distri_add(lcpsubtab->distlcpvalues, lcpvalue);
    }
  }
  outsmalllcpvalues(lcpsubtab->lcp2file,width);
  if (lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue > 0)
  {
    lcpsubtab->lcp2file->totalnumoflargelcpvalues
      += lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue;
    gt_assert(lcpsubtab->lcp2file->outfpllvtab != NULL);
    gt_xfwrite(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue,
               sizeof (*lcpsubtab->lcp2file->largelcpvalues.
                                   spaceLargelcpvalue),
               (size_t) lcpsubtab->lcp2file->largelcpvalues.
                                   nextfreeLargelcpvalue,
               lcpsubtab->lcp2file->outfpllvtab);
  }
}
Esempio n. 12
0
File: stat.c Progetto: 9beckert/TIR
void gth_stat_add_to_refseqcovdistri(GthStat *stat, unsigned long data)
{
  gt_assert(stat);
  if (stat->refseqcovdistri)
    gt_disc_distri_add(stat->refseqcoveragedistribution, data);
}
Esempio n. 13
0
File: stat.c Progetto: 9beckert/TIR
void gth_stat_add_to_matchnumdistri(GthStat *stat, unsigned long data)
{
  gt_assert(stat);
  if (stat->matchnumdistri)
    gt_disc_distri_add(stat->matchnumdistribution, data);
}
Esempio n. 14
0
void gth_stat_add_to_sa_coverage_distri(GthStat *stat, GtUword data)
{
  gt_assert(stat);
  if (stat->sa_stats)
    gt_disc_distri_add(stat->sa_coverage_distribution, data);
}
Esempio n. 15
0
void gth_stat_add_to_matchnumdistri(GthStat *stat, GtUword data)
{
  gt_assert(stat);
  if (stat->matchnumdistri)
    gt_disc_distri_add(stat->matchnumdistribution, data);
}