static int callenummaxpairs(const char *indexname, unsigned int userdefinedleastlength, bool scanfile, Processmaxpairs processmaxpairs, void *processmaxpairsinfo, GtLogger *logger, GtError *err) { bool haserr = false; Sequentialsuffixarrayreader *ssar; gt_error_check(err); ssar = gt_newSequentialsuffixarrayreaderfromfile(indexname, SARR_LCPTAB | SARR_SUFTAB | SARR_ESQTAB | SARR_SSPTAB, scanfile ? SEQ_scan : SEQ_mappedboth, logger, err); if (ssar == NULL) { haserr = true; } if (!haserr && gt_enumeratemaxpairs(ssar, gt_encseqSequentialsuffixarrayreader(ssar), gt_readmodeSequentialsuffixarrayreader(ssar), userdefinedleastlength, processmaxpairs, processmaxpairsinfo, err) != 0) { haserr = true; } if (ssar != NULL) { gt_freeSequentialsuffixarrayreader(&ssar); } return haserr ? -1 : 0; }
static int computeoccurrenceratio(Sequentialsuffixarrayreader *ssar, unsigned long minmersize, unsigned long maxmersize, GtArrayuint64_t *uniquedistribution, GtArrayuint64_t *nonuniquedistribution, GtArrayuint64_t *nonuniquemultidistribution, GtLogger *logger, GtError *err) { OccDfsstate *state; bool haserr = false; gt_error_check(err); state = gt_malloc(sizeof (*state)); state->encseq = gt_encseqSequentialsuffixarrayreader(ssar); state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar); state->totallength = gt_encseq_total_length(state->encseq); state->minmersize = minmersize; state->maxmersize = maxmersize; state->uniquedistribution = uniquedistribution; state->nonuniquedistribution = nonuniquedistribution; state->nonuniquemultidistribution = nonuniquemultidistribution; if (gt_depthfirstesa(ssar, occ_allocateDfsinfo, occ_freeDfsinfo, occ_processleafedge, NULL, occ_processcompletenode, occ_assignleftmostleaf, occ_assignrightmostleaf, (Dfsstate*) state, logger, err) != 0) { haserr = true; } gt_free(state); return haserr ? -1 : 0; }
static int enumeratelcpintervals(const char *inputindex, Sequentialsuffixarrayreader *ssar, const char *storeindex, bool storecounts, GtUword mersize, GtUword minocc, GtUword maxocc, bool performtest, GtLogger *logger, GtError *err) { TyrDfsstate *state; bool haserr = false; unsigned int alphasize; gt_error_check(err); state = gt_malloc(sizeof (*state)); GT_INITARRAY(&state->occdistribution,Countwithpositions); state->esrspace = gt_encseq_create_reader_with_readmode( gt_encseqSequentialsuffixarrayreader(ssar), gt_readmodeSequentialsuffixarrayreader(ssar), 0); state->mersize = (GtUword) mersize; state->encseq = gt_encseqSequentialsuffixarrayreader(ssar); alphasize = gt_alphabet_num_of_chars(gt_encseq_alphabet(state->encseq)); state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar); state->storecounts = storecounts; state->minocc = minocc; state->maxocc = maxocc; state->totallength = gt_encseq_total_length(state->encseq); state->performtest = performtest; state->countoutputmers = 0; state->merindexfpout = NULL; state->countsfilefpout = NULL; GT_INITARRAY(&state->largecounts,Largecount); if (strlen(storeindex) == 0) { state->sizeofbuffer = 0; state->bytebuffer = NULL; } else { state->sizeofbuffer = MERBYTES(mersize); state->bytebuffer = gt_malloc(sizeof *state->bytebuffer * state->sizeofbuffer); } if (performtest) { state->currentmer = gt_malloc(sizeof *state->currentmer * state->mersize); state->suftab = gt_suftabSequentialsuffixarrayreader(ssar); } else { state->currentmer = NULL; state->suftab = NULL; } if (state->mersize > state->totallength) { gt_error_set(err,"mersize "GT_WU" > "GT_WU" = totallength not allowed", state->mersize, state->totallength); haserr = true; } else { if (strlen(storeindex) == 0) { state->processoccurrencecount = adddistpos2distribution; } else { state->merindexfpout = gt_fa_fopen_with_suffix(storeindex,MERSUFFIX, "wb",err); if (state->merindexfpout == NULL) { haserr = true; } else { if (state->storecounts) { state->countsfilefpout = gt_fa_fopen_with_suffix(storeindex,COUNTSSUFFIX,"wb",err); if (state->countsfilefpout == NULL) { haserr = true; } } } state->processoccurrencecount = outputsortedstring2index; } if (!haserr) { if (gt_depthfirstesa(ssar, tyr_allocateDfsinfo, tyr_freeDfsinfo, tyr_processleafedge, NULL, tyr_processcompletenode, tyr_assignleftmostleaf, tyr_assignrightmostleaf, (Dfsstate*) state, logger, err) != 0) { haserr = true; } if (strlen(storeindex) == 0) { showfinalstatistics(state,inputindex,logger); } } if (!haserr) { if (state->countsfilefpout != NULL) { gt_logger_log(logger,"write "GT_WU" mercounts > "GT_WU " to file \"%s%s\"", state->largecounts.nextfreeLargecount, (GtUword) MAXSMALLMERCOUNT, storeindex, COUNTSSUFFIX); gt_xfwrite(state->largecounts.spaceLargecount, sizeof (Largecount), (size_t) state->largecounts.nextfreeLargecount, state->countsfilefpout); } } if (!haserr) { gt_logger_log(logger,"number of "GT_WU"-mers in index: "GT_WU"", mersize, state->countoutputmers); gt_logger_log(logger,"index size: %.2f megabytes\n", GT_MEGABYTES(state->countoutputmers * state->sizeofbuffer + sizeof (GtUword) * EXTRAINTEGERS)); } } /* now out EXTRAINTEGERS integer values */ if (!haserr && state->merindexfpout != NULL) { outputbytewiseUlongvalue(state->merindexfpout, (GtUword) state->mersize); outputbytewiseUlongvalue(state->merindexfpout,(GtUword) alphasize); } gt_fa_xfclose(state->merindexfpout); gt_fa_xfclose(state->countsfilefpout); GT_FREEARRAY(&state->occdistribution,Countwithpositions); gt_free(state->currentmer); gt_free(state->bytebuffer); GT_FREEARRAY(&state->largecounts,Largecount); gt_encseq_reader_delete(state->esrspace); gt_free(state); return haserr ? -1 : 0; }