static int compute_statistics(GtFeatureNode *fn, void *data, GtError *err) { GtStatVisitor *sv; GT_UNUSED int rval; gt_error_check(err); gt_assert(data); sv = (GtStatVisitor*) data; if (gt_feature_node_is_multi(fn) && gt_feature_node_get_multi_representative(fn) == fn) { sv->number_of_multi_features++; } if (sv->used_sources) compute_source_statistics(fn, sv->used_sources); compute_type_statistics(fn, sv); if (sv->exon_number_distribution || sv->cds_length_distribution) { sv->exon_number_for_distri = 0; sv->cds_length_for_distri = 0; rval = gt_feature_node_traverse_direct_children(fn, sv, add_exon_or_cds_number, err); gt_assert(!rval); /* add_exon_or_cds_number() is sane */ if (sv->exon_number_distribution && sv->exon_number_for_distri) { gt_disc_distri_add(sv->exon_number_distribution, sv->exon_number_for_distri); } if (sv->cds_length_distribution && sv->cds_length_for_distri) { gt_disc_distri_add(sv->cds_length_distribution, sv->cds_length_for_distri); } } return 0; }
void gth_stat_add_to_sa_alignment_score_distri(GthStat *stat, unsigned long data) { gt_assert(stat); if (stat->sa_stats) gt_disc_distri_add(stat->sa_alignment_score_distribution, data); }
static void gthinitfragments(GtFragment *fragments, GtUword *num_of_fragments, GthMatch *storematchtab, GtUword numofmatches, GtUword rare, double fragweightfactor) { GthMatch *mptr; GtFragment *fragmentptr; GtWord tmp, largestdim1 = 0, largestdim2 = 0; GtDiscDistri *startpointdistri = NULL; /* init number of fragments */ *num_of_fragments = 0; if (rare) startpointdistri = gt_disc_distri_new(); for (mptr = storematchtab; mptr < storematchtab + numofmatches; mptr++) { /* first dimension */ tmp = mptr->Storepositionreference + mptr->Storelengthreference - 1; if (largestdim1 < tmp) largestdim1 = tmp; /* second dimension */ tmp = mptr->Storepositiongenomic + mptr->Storelengthgenomic - 1; if (largestdim2 < tmp) largestdim2 = tmp; } for (mptr = storematchtab, fragmentptr = fragments; mptr < storematchtab + numofmatches; mptr++) { if (rare) gt_disc_distri_add(startpointdistri, mptr->Storepositionreference); if ((!rare || gt_disc_distri_get(startpointdistri, mptr->Storepositionreference) <= rare) && (mptr == storematchtab || /* is the first match */ !gth_matches_are_equal(mptr, mptr-1))) { /* or is different from last one */ fragmentptr->weight = (GtWord) (fragweightfactor * (double) abs(mptr->Storescore)); fragmentptr->startpos1 = mptr->Storepositionreference; fragmentptr->endpos1 = mptr->Storepositionreference + mptr->Storelengthreference - 1; fragmentptr->startpos2 = mptr->Storepositiongenomic; fragmentptr->endpos2 = mptr->Storepositiongenomic + mptr->Storelengthgenomic - 1; fragmentptr++; (*num_of_fragments)++; } } gt_disc_distri_delete(startpointdistri); gt_assert(*num_of_fragments <= numofmatches); }
static void compute_type_statistics(GtFeatureNode *fn, GtStatVisitor *sv) { GtRange range; gt_assert(fn && sv); if (gt_feature_node_has_type(fn, gt_ft_gene)) { sv->number_of_genes++; if (gt_feature_node_has_CDS(fn)) sv->number_of_protein_coding_genes++; if (sv->gene_length_distribution) { range = gt_genome_node_get_range((GtGenomeNode*) fn); gt_disc_distri_add(sv->gene_length_distribution, gt_range_length(&range)); } if (sv->gene_score_distribution) { gt_disc_distri_add(sv->gene_score_distribution, gt_feature_node_get_score(fn) * 100.0); } } else if (gt_feature_node_has_type(fn, gt_ft_mRNA)) { sv->number_of_mRNAs++; if (gt_feature_node_has_CDS(fn)) sv->number_of_protein_coding_mRNAs++; } else if (gt_feature_node_has_type(fn, gt_ft_exon)) { sv->number_of_exons++; if (sv->exon_length_distribution) { range = gt_genome_node_get_range((GtGenomeNode*) fn); gt_disc_distri_add(sv->exon_length_distribution, gt_range_length(&range)); } } else if (gt_feature_node_has_type(fn, gt_ft_CDS)) { sv->number_of_CDSs++; } else if (gt_feature_node_has_type(fn, gt_ft_intron)) { if (sv->intron_length_distribution) { range = gt_genome_node_get_range((GtGenomeNode*) fn); gt_disc_distri_add(sv->intron_length_distribution, gt_range_length(&range)); } } else if (gt_feature_node_has_type(fn, gt_ft_LTR_retrotransposon)) { sv->number_of_LTR_retrotransposons++; } }
GtDiscDistri *gt_condenseq_link_length_dist(const GtCondenseq *condenseq) { GtUword idx; GtDiscDistri *res = gt_disc_distri_new(); for (idx = 0; idx < condenseq->ldb_nelems; idx++) { gt_disc_distri_add(res, condenseq->links[idx].len); } return res; }
void gt_bioseq_show_seqlengthdistri(GtBioseq *bs, GtFile *outfp) { GtDiscDistri *d; GtUword i; gt_assert(bs); d = gt_disc_distri_new(); for (i = 0; i < gt_bioseq_number_of_sequences(bs); i++) gt_disc_distri_add(d, gt_bioseq_get_sequence_length(bs, i)); gt_file_xprintf(outfp, "sequence length distribution:\n"); gt_disc_distri_show(d, outfp); gt_disc_distri_delete(d); }
static void addSAtoexondistribution(GtDiscDistri *exondistribution, GthSA *sa) { Exoninfo *exoninfo; unsigned long i; /* add values to exondistribution */ for (i = 0; i < gth_sa_num_of_exons(sa); i++) { exoninfo = gth_sa_get_exon(sa, i); gt_disc_distri_add(exondistribution, exoninfo->rightgenomicexonborder - exoninfo->leftgenomicexonborder + 1); } }
static void addSAtointrondistribution(GtDiscDistri *introndistribution, GthSA *sa) { unsigned long i; /* add values to introndistribution */ for (i = 0; i < gth_sa_num_of_introns(sa); i++) { gt_disc_distri_add(introndistribution, gth_sa_get_exon(sa, i+1) ->leftgenomicexonborder - gth_sa_get_exon(sa, i) ->rightgenomicexonborder - 1); } }
GtDiscDistri *gt_condenseq_link_comp_dist(const GtCondenseq *condenseq) { GtUword idx; GtDiscDistri *res = gt_disc_distri_new(); for (idx = 0; idx < condenseq->ldb_nelems; idx++) { GtEditscript *es = condenseq->links[idx].editscript; GtUword vlen; size_t size; vlen = gt_editscript_get_target_len(es); size = gt_editscript_size(es); gt_disc_distri_add(res, (GtUword) ((double) size/(double) vlen * 100)); } return res; }
int gt_disc_distri_unit_test(GtError *err) { GtDiscDistri *d; int had_err = 0; gt_error_check(err); d = gt_disc_distri_new(); ensure(had_err, gt_disc_distri_get(d, 0) == 0); ensure(had_err, gt_disc_distri_get(d, 100) == 0); if (!had_err) { gt_disc_distri_add(d, 0); gt_disc_distri_add_multi(d, 100, 256); } ensure(had_err, gt_disc_distri_get(d, 0) == 1); ensure(had_err, gt_disc_distri_get(d, 100) == 256); gt_disc_distri_delete(d); return had_err; }
static void outlcpvalues(Lcpsubtab *lcpsubtab, GtUword width, GtUword posoffset) { GtUword idx, lcpvalue; Largelcpvalue *largelcpvalueptr; gt_assert(lcpsubtab != NULL && lcpsubtab->lcp2file != NULL); lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue = 0; if (lcpsubtab->tableoflcpvalues.numoflargelcpvalues > 0 && lcpsubtab->tableoflcpvalues.numoflargelcpvalues >= lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue) { lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue = gt_realloc(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue, sizeof (*lcpsubtab->lcp2file->largelcpvalues. spaceLargelcpvalue) * lcpsubtab->tableoflcpvalues.numoflargelcpvalues); lcpsubtab->lcp2file->largelcpvalues.allocatedLargelcpvalue = lcpsubtab->tableoflcpvalues.numoflargelcpvalues; } for (idx=0; idx<width; idx++) { lcpvalue = gt_lcptab_getvalue(&lcpsubtab->tableoflcpvalues,0,idx); if (lcpsubtab->lcp2file->maxbranchdepth < lcpvalue) { lcpsubtab->lcp2file->maxbranchdepth = lcpvalue; } if (lcpvalue < (GtUword) LCPOVERFLOW) { lcpsubtab->lcp2file->smalllcpvalues[idx] = (uint8_t) lcpvalue; } else { gt_assert(lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue < lcpsubtab->lcp2file->largelcpvalues. allocatedLargelcpvalue); largelcpvalueptr = lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue + lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue++; largelcpvalueptr->position = posoffset + idx; largelcpvalueptr->value = lcpvalue; lcpsubtab->lcp2file->smalllcpvalues[idx] = LCPOVERFLOW; } lcpsubtab->lcptabsum += (double) lcpvalue; if (lcpsubtab->distlcpvalues != NULL) { gt_disc_distri_add(lcpsubtab->distlcpvalues, lcpvalue); } } outsmalllcpvalues(lcpsubtab->lcp2file,width); if (lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue > 0) { lcpsubtab->lcp2file->totalnumoflargelcpvalues += lcpsubtab->lcp2file->largelcpvalues.nextfreeLargelcpvalue; gt_assert(lcpsubtab->lcp2file->outfpllvtab != NULL); gt_xfwrite(lcpsubtab->lcp2file->largelcpvalues.spaceLargelcpvalue, sizeof (*lcpsubtab->lcp2file->largelcpvalues. spaceLargelcpvalue), (size_t) lcpsubtab->lcp2file->largelcpvalues. nextfreeLargelcpvalue, lcpsubtab->lcp2file->outfpllvtab); } }
void gth_stat_add_to_refseqcovdistri(GthStat *stat, unsigned long data) { gt_assert(stat); if (stat->refseqcovdistri) gt_disc_distri_add(stat->refseqcoveragedistribution, data); }
void gth_stat_add_to_matchnumdistri(GthStat *stat, unsigned long data) { gt_assert(stat); if (stat->matchnumdistri) gt_disc_distri_add(stat->matchnumdistribution, data); }
void gth_stat_add_to_sa_coverage_distri(GthStat *stat, GtUword data) { gt_assert(stat); if (stat->sa_stats) gt_disc_distri_add(stat->sa_coverage_distribution, data); }
void gth_stat_add_to_matchnumdistri(GthStat *stat, GtUword data) { gt_assert(stat); if (stat->matchnumdistri) gt_disc_distri_add(stat->matchnumdistribution, data); }