static void gthinitfragments(GtFragment *fragments, GtUword *num_of_fragments, GthMatch *storematchtab, GtUword numofmatches, GtUword rare, double fragweightfactor) { GthMatch *mptr; GtFragment *fragmentptr; GtWord tmp, largestdim1 = 0, largestdim2 = 0; GtDiscDistri *startpointdistri = NULL; /* init number of fragments */ *num_of_fragments = 0; if (rare) startpointdistri = gt_disc_distri_new(); for (mptr = storematchtab; mptr < storematchtab + numofmatches; mptr++) { /* first dimension */ tmp = mptr->Storepositionreference + mptr->Storelengthreference - 1; if (largestdim1 < tmp) largestdim1 = tmp; /* second dimension */ tmp = mptr->Storepositiongenomic + mptr->Storelengthgenomic - 1; if (largestdim2 < tmp) largestdim2 = tmp; } for (mptr = storematchtab, fragmentptr = fragments; mptr < storematchtab + numofmatches; mptr++) { if (rare) gt_disc_distri_add(startpointdistri, mptr->Storepositionreference); if ((!rare || gt_disc_distri_get(startpointdistri, mptr->Storepositionreference) <= rare) && (mptr == storematchtab || /* is the first match */ !gth_matches_are_equal(mptr, mptr-1))) { /* or is different from last one */ fragmentptr->weight = (GtWord) (fragweightfactor * (double) abs(mptr->Storescore)); fragmentptr->startpos1 = mptr->Storepositionreference; fragmentptr->endpos1 = mptr->Storepositionreference + mptr->Storelengthreference - 1; fragmentptr->startpos2 = mptr->Storepositiongenomic; fragmentptr->endpos2 = mptr->Storepositiongenomic + mptr->Storelengthgenomic - 1; fragmentptr++; (*num_of_fragments)++; } } gt_disc_distri_delete(startpointdistri); gt_assert(*num_of_fragments <= numofmatches); }
GtDiscDistri *gt_condenseq_link_length_dist(const GtCondenseq *condenseq) { GtUword idx; GtDiscDistri *res = gt_disc_distri_new(); for (idx = 0; idx < condenseq->ldb_nelems; idx++) { gt_disc_distri_add(res, condenseq->links[idx].len); } return res; }
GtHpolProcessor *gt_hpol_processor_new(GtEncseq *encseq, unsigned long hmin) { GtHpolProcessor *hpp; hpp = gt_malloc(sizeof (GtHpolProcessor)); gt_assert(encseq != NULL); gt_assert(hmin > 0); hpp->encseq = encseq; hpp->hmin = hmin; hpp->read_hmin = 0; hpp->qmax = 0; hpp->mapqmin = 0; hpp->covmin = 0; hpp->allow_partial = false; hpp->allow_multiple = false; hpp->hdist = gt_disc_distri_new(); hpp->nof_h = 0; hpp->hdist_e = gt_disc_distri_new(); hpp->nof_h_e = 0; hpp->hlen_max = 0; hpp->cds_oracle = NULL; hpp->asp = NULL; hpp->nof_complete_edited = 0; hpp->nof_complete_not_edited = 0; hpp->nof_skipped = 0; hpp->nof_unmapped = 0; hpp->nof_multihits = 0; hpp->nof_replaced = 0; hpp->clenmax = GT_UNDEF_ULONG; hpp->altmax = (double) 1.0; hpp->refmin = (double) 0.0; hpp->alpha = gt_alphabet_new_dna(); hpp->adjust_s_hlen = false; hpp->output_segments = false; hpp->outfp_segments = NULL; hpp->output_stats = false; hpp->output_multihit_stats = false; hpp->outfp_stats = NULL; hpp->processed_segments = NULL; hpp->reads_iters = NULL; hpp->outfiles = NULL; hpp->nfiles = 0; return hpp; }
void gt_bioseq_show_seqlengthdistri(GtBioseq *bs, GtFile *outfp) { GtDiscDistri *d; GtUword i; gt_assert(bs); d = gt_disc_distri_new(); for (i = 0; i < gt_bioseq_number_of_sequences(bs); i++) gt_disc_distri_add(d, gt_bioseq_get_sequence_length(bs, i)); gt_file_xprintf(outfp, "sequence length distribution:\n"); gt_disc_distri_show(d, outfp); gt_disc_distri_delete(d); }
GtNodeVisitor* gt_stat_visitor_new(bool gene_length_distri, bool gene_score_distri, bool exon_length_distri, bool exon_number_distri, bool intron_length_distri, bool cds_length_distri, bool used_sources) { GtNodeVisitor *nv = gt_node_visitor_create(gt_stat_visitor_class()); GtStatVisitor *sv = stat_visitor_cast(nv); if (gene_length_distri) sv->gene_length_distribution = gt_disc_distri_new(); if (gene_score_distri) sv->gene_score_distribution = gt_disc_distri_new(); if (exon_length_distri) sv->exon_length_distribution = gt_disc_distri_new(); if (exon_number_distri) sv->exon_number_distribution = gt_disc_distri_new(); if (intron_length_distri) sv->intron_length_distribution = gt_disc_distri_new(); if (cds_length_distri) sv->cds_length_distribution = gt_disc_distri_new(); if (used_sources) sv->used_sources = gt_cstr_table_new(); return nv; }
GtDiscDistri *gt_condenseq_link_comp_dist(const GtCondenseq *condenseq) { GtUword idx; GtDiscDistri *res = gt_disc_distri_new(); for (idx = 0; idx < condenseq->ldb_nelems; idx++) { GtEditscript *es = condenseq->links[idx].editscript; GtUword vlen; size_t size; vlen = gt_editscript_get_target_len(es); size = gt_editscript_size(es); gt_disc_distri_add(res, (GtUword) ((double) size/(double) vlen * 100)); } return res; }
static inline GtDiscDistri *read_zero_disc_distri(GtDiscDistri *dist, FILE *fp, GtError *err) { int had_err = 0; GtUword idx, symbol; GtUint64 freq; GtUword num_of_zero_leaves; dist = gt_disc_distri_new(); had_err = gt_io_error_fread_one(num_of_zero_leaves, fp, err); for (idx = 0; !had_err && idx < num_of_zero_leaves; idx++) { had_err = gt_io_error_fread_one(symbol, fp, err); if (!had_err) had_err = gt_io_error_fread_one(freq, fp, err); if (!had_err) gt_disc_distri_add_multi(dist, symbol, freq); } if (had_err) { gt_disc_distri_delete(dist); dist = NULL; } return dist; }
int gt_disc_distri_unit_test(GtError *err) { GtDiscDistri *d; int had_err = 0; gt_error_check(err); d = gt_disc_distri_new(); ensure(had_err, gt_disc_distri_get(d, 0) == 0); ensure(had_err, gt_disc_distri_get(d, 100) == 0); if (!had_err) { gt_disc_distri_add(d, 0); gt_disc_distri_add_multi(d, 100, 256); } ensure(had_err, gt_disc_distri_get(d, 0) == 1); ensure(had_err, gt_disc_distri_get(d, 100) == 256); gt_disc_distri_delete(d); return had_err; }
GthStat *gth_stat_new(void) { GthStat *stat; stat = gt_malloc(sizeof (GthStat)); stat->exondistri = GTH_DEFAULT_EXONDISTRI; stat->introndistri = GTH_DEFAULT_INTRONDISTRI; stat->matchnumdistri = GTH_DEFAULT_MATCHNUMDISTRI; stat->refseqcovdistri = GTH_DEFAULT_REFSEQCOVDISTRI; stat->sa_stats = false; stat->gthfilestat_mode = false; stat->numofchains = 0; stat->numofremovedzerobaseexons = 0; stat->numofautointroncutoutcalls = 0; stat->numofunsuccessfulintroncutoutDPs = 0; stat->numoffailedDPparameterallocations = 0; stat->numoffailedmatrixallocations = 0; stat->numofundeterminedSAs = 0; stat->numoffilteredpolyAtailmatches = 0; /* init variables for memory statistics */ stat->numofSAs = 0; stat->numofPGLs_stored = 0; stat->totalsizeofbacktracematricesinMB = 0; stat->numofbacktracematrixallocations = 0; /* init distributions */ stat->exondistribution = gt_disc_distri_new(); stat->introndistribution = gt_disc_distri_new(); stat->matchnumdistribution = gt_disc_distri_new(); stat->refseqcoveragedistribution = gt_disc_distri_new(); stat->sa_alignment_score_distribution = gt_disc_distri_new(); stat->sa_coverage_distribution = gt_disc_distri_new(); return stat; }
GtOutlcpinfo *gt_Outlcpinfo_new(const char *indexname, unsigned int numofchars, unsigned int prefixlength, bool withdistribution, bool swallow_tail_lcpvalues, GtFinalProcessBucket final_process_bucket, void *final_process_bucket_info, GtError *err) { bool haserr = false; GtOutlcpinfo *outlcpinfo; outlcpinfo = gt_malloc(sizeof (*outlcpinfo)); outlcpinfo->sizeofinfo = sizeof (*outlcpinfo); outlcpinfo->lcpsubtab.lcptabsum = 0.0; outlcpinfo->swallow_tail_lcpvalues = swallow_tail_lcpvalues; if (withdistribution) { outlcpinfo->lcpsubtab.distlcpvalues = gt_disc_distri_new(); } else { outlcpinfo->lcpsubtab.distlcpvalues = NULL; } if (indexname == NULL) { outlcpinfo->lcpsubtab.lcp2file = NULL; if (final_process_bucket != NULL) { outlcpinfo->lcpsubtab.lcpprocess = gt_malloc(sizeof (*outlcpinfo->lcpsubtab.lcpprocess)); outlcpinfo->lcpsubtab.lcpprocess->final_process_bucket = final_process_bucket; outlcpinfo->lcpsubtab.lcpprocess->final_process_bucket_info = final_process_bucket_info; } else { outlcpinfo->lcpsubtab.lcpprocess = NULL; } } else { outlcpinfo->lcpsubtab.lcpprocess = NULL; outlcpinfo->lcpsubtab.lcp2file = gt_malloc(sizeof (*outlcpinfo->lcpsubtab.lcp2file)); outlcpinfo->sizeofinfo += sizeof (*outlcpinfo->lcpsubtab.lcp2file); outlcpinfo->lcpsubtab.lcp2file->countoutputlcpvalues = 0; outlcpinfo->lcpsubtab.lcp2file->maxbranchdepth = 0; outlcpinfo->lcpsubtab.lcp2file->totalnumoflargelcpvalues = 0; outlcpinfo->lcpsubtab.lcp2file->reservoir = NULL; outlcpinfo->lcpsubtab.lcp2file->sizereservoir = 0; outlcpinfo->lcpsubtab.lcp2file->smalllcpvalues = NULL; GT_INITARRAY(&outlcpinfo->lcpsubtab.lcp2file->largelcpvalues, Largelcpvalue); outlcpinfo->lcpsubtab.lcp2file->outfplcptab = gt_fa_fopen_with_suffix(indexname,GT_LCPTABSUFFIX,"wb",err); if (outlcpinfo->lcpsubtab.lcp2file->outfplcptab == NULL) { haserr = true; } if (!haserr) { outlcpinfo->lcpsubtab.lcp2file->outfpllvtab = gt_fa_fopen_with_suffix(indexname,GT_LARGELCPTABSUFFIX,"wb",err); if (outlcpinfo->lcpsubtab.lcp2file->outfpllvtab == NULL) { haserr = true; } } } outlcpinfo->numsuffixes2output = 0; outlcpinfo->minchanged = 0; if (!haserr && prefixlength > 0) { outlcpinfo->turnwheel = gt_turningwheel_new(prefixlength,numofchars); outlcpinfo->sizeofinfo += gt_turningwheel_size(); } else { outlcpinfo->turnwheel = NULL; } #ifdef SKDEBUG outlcpinfo->previoussuffix.startpos = 0; #endif outlcpinfo->previoussuffix.code = 0; outlcpinfo->previoussuffix.prefixindex = 0; outlcpinfo->previoussuffix.defined = false; outlcpinfo->previousbucketwasempty = false; outlcpinfo->lcpsubtab.tableoflcpvalues.bucketoflcpvalues = NULL; outlcpinfo->lcpsubtab.tableoflcpvalues.numofentries = 0; #ifndef NDEBUG outlcpinfo->lcpsubtab.tableoflcpvalues.isset = NULL; #endif if (haserr) { gt_free(outlcpinfo); return NULL; } return outlcpinfo; }