static int inputsuffixarray(bool map, Suffixarray *suffixarray, unsigned int demand, const GtStr *indexname, Verboseinfo *verboseinfo, GtError *err) { bool haserr = false; Seqpos totallength = 0; gt_error_check(err); initsuffixarray(suffixarray); suffixarray->encseq = mapencodedsequence(true, indexname, (demand & SARR_ESQTAB) ? true : false, (demand & SARR_DESTAB) ? true : false, (demand & SARR_SDSTAB) ? true : false, (demand & SARR_SSPTAB) ? true : false, verboseinfo, err); if (suffixarray->encseq == NULL) { haserr = true; } else { totallength = getencseqtotallength(suffixarray->encseq); } if (!haserr) { haserr = scanprjfileuintkeys(suffixarray,indexname,verboseinfo,err); } if (!haserr && (demand & SARR_SUFTAB)) { if (map) { suffixarray->suftab = genericmaptable(indexname, SUFTABSUFFIX, (unsigned long) (totallength+1), sizeof (Seqpos), err); if (suffixarray->suftab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->suftabstream,Seqpos, SUFTABSUFFIX); } if (!haserr && !suffixarray->longest.defined) { gt_error_set(err,"longest not defined"); haserr = true; } } if (!haserr && (demand & SARR_LCPTAB)) { if (map) { suffixarray->lcptab = genericmaptable(indexname, LCPTABSUFFIX, (unsigned long) (totallength+1), sizeof (GtUchar), err); if (suffixarray->lcptab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar, LCPTABSUFFIX); if (!haserr && fseek(suffixarray->lcptabstream.fp,(long) sizeof (GtUchar),SEEK_SET)) { gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno)); haserr = true; } } if (!haserr && !suffixarray->numoflargelcpvalues.defined) { gt_error_set(err,"numoflargelcpvalues not defined"); haserr = true; } if (!haserr && suffixarray->numoflargelcpvalues.valueseqpos > 0) { if (map) { suffixarray->llvtab = genericmaptable(indexname, LARGELCPTABSUFFIX, (unsigned long) suffixarray->numoflargelcpvalues. valueseqpos, sizeof (Largelcpvalue), err); if (suffixarray->llvtab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue, LARGELCPTABSUFFIX); } } } if (!haserr && (demand & SARR_BWTTAB)) { if (map) { suffixarray->bwttab = genericmaptable(indexname, BWTTABSUFFIX, (unsigned long) (totallength+1), sizeof (GtUchar), err); if (suffixarray->bwttab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar, BWTTABSUFFIX); } } if (!haserr && (demand & SARR_BCKTAB)) { if (map) { suffixarray->bcktab = mapbcktab(indexname, getencseqAlphabetnumofchars(suffixarray-> encseq), suffixarray->prefixlength, err); if (suffixarray->bcktab == NULL) { haserr = true; } } else { gt_error_set(err,"cannot stream bcktab"); haserr = true; } } if (haserr) { freesuffixarray(suffixarray); } return haserr ? -1 : 0; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; Verboseinfo *verbosity = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: had_err = true; exitNow = true; break; case OPTIONPARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); bwtSeq = availBWTSeq(¶ms.idx.final, verbosity, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags, params.progressInterval, stderr, verbosity, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { Seqpos totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, inputProject, NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = getencseqtotallength(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = getencseqAlphabetnumofchars(suffixarray.encseq); if (params.minPatLen < 0) params.minPatLen = recommendedprefixlength(numofchars, totalLen); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * recommendedprefixlength(numofchars, totalLen) / 100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" FormatSeqpos" vs. "FormatSeqpos, totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi); MMsearchiterator *mmsi = newmmsearchiterator(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { Seqpos numMatches; if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } numMatches = EMINumMatchesTotal(&EMIter); gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi)); /* fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */ /* "pattern: ", trial, numMatches); */ /* fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */ /* patternLen); */ /* putc('\n', stderr); */ while (nextmmsearchiterator(&dbstart,mmsi)) { Seqpos matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n", matchPos, dbstart); } } if (!had_err) { Seqpos matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = countmmsearchiterator(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" FormatSeqpos") and fmindex ("FormatSeqpos".\n", numFMIMatches, numMMSearchMatches); } } freemmsearchiterator(&mmsi); if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) destructEMIterator(&EMIter); if (saIsLoaded) freesuffixarray(&suffixarray); if (epi) freeEnumpatterniterator(&epi); if (bwtSeq) deleteBWTSeq(bwtSeq); if (verbosity) freeverboseinfo(&verbosity); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }