extern int gt_packedindex_chk_integrity(int argc, const char *argv[], GtError *err) { struct encIdxSeq *seq; struct chkIndexOptions params; GtStr *inputProject; int parsedArgs; int had_err = 0; Verboseinfo *verbosity = NULL; gt_error_check(err); switch (parseChkIndexOptions(&parsedArgs, argc, argv, ¶ms, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: return -1; case OPTIONPARSER_REQUESTS_EXIT: return 0; } inputProject = gt_str_new_cstr(argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); seq = loadEncIdxSeq(inputProject, params.encType, params.EISFeatureSet, verbosity, err); if ((had_err = seq == NULL)) { gt_error_set(err, "Failed to load index: %s", gt_str_get(inputProject)); } else { fprintf(stderr, "# Using index over sequence "FormatSeqpos " symbols long.\n", EISLength(seq)); { int corrupt = EISVerifyIntegrity(seq, inputProject, params.skipCount, params.progressInterval, stderr, params.checkFlags, verbosity, err); if ((had_err = corrupt != 0)) { fputs(gt_error_get(err), stderr); fputs("\n", stderr); gt_error_set(err, "Integrity check failed for index: %s", EISIntegrityCheckResultStrings[corrupt]); } } } if (seq) deleteEncIdxSeq(seq); if (inputProject) gt_str_delete(inputProject); if (verbosity) freeverboseinfo(&verbosity); return had_err?-1:0; }
static int gt_tyr_occratio_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { Verboseinfo *verboseinfo; Tyr_occratio_options *arguments = tool_arguments; bool haserr = false; GtArrayuint64_t uniquedistribution, nonuniquedistribution, nonuniquemultidistribution; verboseinfo = newverboseinfo(arguments->verbose); GT_INITARRAY(&uniquedistribution,uint64_t); GT_INITARRAY(&nonuniquedistribution,uint64_t); GT_INITARRAY(&nonuniquemultidistribution,uint64_t); if (tyr_occratio(arguments->str_inputindex, arguments->scanfile, arguments->minmersize, arguments->maxmersize, &uniquedistribution, &nonuniquedistribution, &nonuniquemultidistribution, verboseinfo, err) != 0) { haserr = true; } if (!haserr) { showoccratios(&uniquedistribution, &nonuniquedistribution, &nonuniquemultidistribution, arguments->outputmode, arguments->outputvector); } freeverboseinfo(&verboseinfo); GT_FREEARRAY(&uniquedistribution,uint64_t); GT_FREEARRAY(&nonuniquedistribution,uint64_t); GT_FREEARRAY(&nonuniquemultidistribution,uint64_t); return haserr ? -1 : 0; }
static int gt_cge_spacedseed_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { Cge_spacedseed_options *arguments = tool_arguments; Verboseinfo *verboseinfo; bool haserr = false; gt_assert(parsed_args == argc); verboseinfo = newverboseinfo(arguments->verbose); if (arguments->verbose) { unsigned long idx; printf("# %sindex=%s\n",arguments->withesa ? "esa" : "pck", gt_str_get(arguments->str_inputindex)); for (idx = 0; idx < gt_str_array_size(arguments->queryfilenames); idx++) { printf("# queryfile=%s\n", gt_str_array_get(arguments->queryfilenames,idx)); } } if (matchspacedseed(arguments->withesa, arguments->docompare, arguments->str_inputindex, arguments->queryfilenames, arguments->verbose, err) != 0) { haserr = true; } freeverboseinfo(&verboseinfo); return haserr ? - 1 : 0; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; Verboseinfo *verbosity = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: had_err = true; exitNow = true; break; case OPTIONPARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); bwtSeq = availBWTSeq(¶ms.idx.final, verbosity, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags, params.progressInterval, stderr, verbosity, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { Seqpos totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, inputProject, NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = getencseqtotallength(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = getencseqAlphabetnumofchars(suffixarray.encseq); if (params.minPatLen < 0) params.minPatLen = recommendedprefixlength(numofchars, totalLen); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * recommendedprefixlength(numofchars, totalLen) / 100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" FormatSeqpos" vs. "FormatSeqpos, totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi); MMsearchiterator *mmsi = newmmsearchiterator(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { Seqpos numMatches; if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } numMatches = EMINumMatchesTotal(&EMIter); gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi)); /* fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */ /* "pattern: ", trial, numMatches); */ /* fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */ /* patternLen); */ /* putc('\n', stderr); */ while (nextmmsearchiterator(&dbstart,mmsi)) { Seqpos matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n", matchPos, dbstart); } } if (!had_err) { Seqpos matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = countmmsearchiterator(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" FormatSeqpos") and fmindex ("FormatSeqpos".\n", numFMIMatches, numMMSearchMatches); } } freemmsearchiterator(&mmsi); if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) destructEMIterator(&EMIter); if (saIsLoaded) freesuffixarray(&suffixarray); if (epi) freeEnumpatterniterator(&epi); if (bwtSeq) deleteBWTSeq(bwtSeq); if (verbosity) freeverboseinfo(&verbosity); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }
static int gt_tyr_mkindex_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { Tyr_mkindex_options *arguments = tool_arguments; Verboseinfo *verboseinfo; bool haserr = false; verboseinfo = newverboseinfo(arguments->verbose); if (arguments->verbose) { printf("# mersize=%lu\n",arguments->mersize); if (arguments->userdefinedminocc > 0) { printf("# minocc=%lu\n",arguments->userdefinedminocc); } else { printf("# minocc=undefined\n"); } if (arguments->userdefinedmaxocc > 0) { printf("# maxocc=%lu\n",arguments->userdefinedmaxocc); } else { printf("# maxocc=undefined\n"); } printf("# prefixlength="); if (arguments->prefixlength.flag == Autoprefixlength) { printf("automatic"); } else { if (arguments->prefixlength.flag == Determinedprefixlength) { printf("%u",arguments->prefixlength.value); } else { printf("undefined"); } } printf("\n"); if (gt_str_length(arguments->str_storeindex) > 0) { printf("# storeindex=%s\n",gt_str_get(arguments->str_storeindex)); } printf("# inputindex=%s\n",gt_str_get(arguments->str_inputindex)); } if (merstatistics(arguments->str_inputindex, arguments->mersize, arguments->userdefinedminocc, arguments->userdefinedmaxocc, arguments->str_storeindex, arguments->storecounts, arguments->scanfile, arguments->performtest, verboseinfo, err) != 0) { haserr = true; } if (!haserr && gt_str_length(arguments->str_storeindex) > 0 && arguments->prefixlength.flag != Undeterminedprefixlength) { Definedunsignedint callprefixlength; if (arguments->prefixlength.flag == Determinedprefixlength) { callprefixlength.defined = true; callprefixlength.valueunsignedint = arguments->prefixlength.value; } else { callprefixlength.defined = false; } if (constructmerbuckets(arguments->str_storeindex,&callprefixlength,err) != 0) { haserr = true; } } freeverboseinfo(&verboseinfo); return haserr ? - 1 : 0; }
int runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err) { Genericindex *genericindex = NULL; bool haserr = false; Verboseinfo *verboseinfo; const Encodedsequence *encseq = NULL; verboseinfo = newverboseinfo(idxlocalioptions->verbose); if (idxlocalioptions->doonline) { encseq = mapencodedsequence (true, idxlocalioptions->indexname, true, false, false, true, verboseinfo, err); if (encseq == NULL) { haserr = true; } } else { genericindex = genericindex_new(idxlocalioptions->indexname, idxlocalioptions->withesa, idxlocalioptions->withesa || idxlocalioptions->docompare, false, true, 0, verboseinfo, err); if (genericindex == NULL) { haserr = true; } else { encseq = genericindex_getencseq(genericindex); } } if (!haserr) { GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; Limdfsresources *limdfsresources = NULL; const AbstractDfstransformer *dfst; SWdpresource *swdpresource = NULL; Showmatchinfo showmatchinfo; Processmatch processmatch; void *processmatchinfoonline, *processmatchinfooffline; Storematchinfo storeonline, storeoffline; if (idxlocalioptions->docompare) { processmatch = storematch; initstorematch(&storeonline,encseq); initstorematch(&storeoffline,encseq); processmatchinfoonline = &storeonline; processmatchinfooffline = &storeoffline; } else { processmatch = showmatch; showmatchinfo.encseq = encseq; showmatchinfo.characters = getencseqAlphabetcharacters(encseq); showmatchinfo.wildcardshow = getencseqAlphabetwildcardshow(encseq); showmatchinfo.showalignment = idxlocalioptions->showalignment; processmatchinfoonline = processmatchinfooffline = &showmatchinfo; } if (idxlocalioptions->doonline || idxlocalioptions->docompare) { swdpresource = newSWdpresource(idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapextend, idxlocalioptions->threshold, idxlocalioptions->showalignment, processmatch, processmatchinfoonline); } dfst = locali_AbstractDfstransformer(); if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_assert(genericindex != NULL); limdfsresources = newLimdfsresources(genericindex, true, 0, 0, /* maxpathlength */ true, /* keepexpandedonstack */ processmatch, processmatchinfooffline, NULL, /* processresult */ NULL, /* processresult info */ dfst); } seqit = gt_seqiterator_new(idxlocalioptions->queryfiles, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, getencseqAlphabetsymbolmap(encseq)); for (showmatchinfo.queryunit = 0; /* Nothing */; showmatchinfo.queryunit++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } printf("process sequence " Formatuint64_t " of length %lu\n", PRINTuint64_tcast(showmatchinfo.queryunit),querylen); if (idxlocalioptions->doonline || idxlocalioptions->docompare) { multiapplysmithwaterman(swdpresource,encseq,query,querylen); } if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { indexbasedlocali(limdfsresources, idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapstart, idxlocalioptions->gapextend, idxlocalioptions->threshold, query, querylen, dfst); } if (idxlocalioptions->docompare) { checkandresetstorematch(showmatchinfo.queryunit, &storeonline,&storeoffline); } gt_free(desc); } if (limdfsresources != NULL) { freeLimdfsresources(&limdfsresources,dfst); } if (swdpresource != NULL) { freeSWdpresource(swdpresource); swdpresource = NULL; } gt_seqiterator_delete(seqit); } if (idxlocalioptions->docompare) { freestorematch(&storeonline); freestorematch(&storeoffline); } } if (genericindex == NULL) { gt_assert(encseq != NULL); encodedsequence_free((Encodedsequence **) &encseq); } else { genericindex_delete(genericindex); } freeverboseinfo(&verboseinfo); return haserr ? -1 : 0; }