static inline BitOffset searchLocateCountMark(const BWTSeq *bwtSeq, Seqpos pos, struct extBitsRetrieval *extBits) { unsigned i, numMarks, bitsPerCount; BitOffset markOffset; EISRetrieveExtraBits(bwtSeq->seqIdx, pos, EBRF_RETRIEVE_CWBITS | EBRF_RETRIEVE_VARBITS, extBits, bwtSeq->hint); markOffset = extBits->varOffset; bitsPerCount = requiredSeqposBits(extBits->len); numMarks = gt_bsGetSeqpos(extBits->varPart, markOffset, bitsPerCount); if (numMarks) { unsigned bitsPerBWTPos = requiredSeqposBits(extBits->len - 1), bitsPerOrigPos = requiredSeqposBits( ((bwtSeq->featureToggles & BWTReversiblySorted) ? (BWTSeqLength(bwtSeq) - 1) / bwtSeq->locateSampleInterval : BWTSeqLength(bwtSeq) - 1)); Seqpos cmpPos = pos - extBits->start; markOffset += bitsPerCount; for (i = 0; i < numMarks; ++i) { Seqpos markedPos = gt_bsGetSeqpos(extBits->varPart, markOffset, bitsPerBWTPos); if (markedPos < cmpPos) markOffset += bitsPerBWTPos + bitsPerOrigPos; else if (markedPos > cmpPos) break; else if (markedPos == cmpPos) return markOffset + bitsPerBWTPos; } } return 0; }
static inline BitOffset locateVarBits(const BWTSeq *bwtSeq, struct extBitsRetrieval *extBits) { BitOffset numLocBits = 0; unsigned bitsPerBWTPos = requiredSeqposBits(extBits->len - 1), bitsPerOrigPos = requiredSeqposBits( ((bwtSeq->featureToggles & BWTReversiblySorted) ? (BWTSeqLength(bwtSeq) - 1) / bwtSeq->locateSampleInterval : BWTSeqLength(bwtSeq) - 1)); if (bwtSeq->featureToggles & BWTLocateBitmap) { unsigned numMarks = gt_bs1BitsCount(extBits->cwPart, extBits->cwOffset, extBits->len); numLocBits = numMarks * bitsPerOrigPos; } else if (bwtSeq->featureToggles & BWTLocateCount) { BitOffset markOffset = extBits->varOffset; unsigned bitsPerCount = requiredSeqposBits(extBits->len); unsigned numMarks = gt_bsGetSeqpos(extBits->varPart, markOffset, bitsPerCount); numLocBits = bitsPerCount + numMarks * (bitsPerBWTPos + bitsPerOrigPos); } return numLocBits; }
GtUword gt_voidpackedindex_totallength_get(const FMindex *fmindex) { GtUword bwtlen = BWTSeqLength((const BWTSeq *) fmindex); gt_assert(bwtlen > 0); return bwtlen - 1; }
unsigned long gt_voidpackedindex_totallength_get(const FMindex *fmindex) { unsigned long bwtlen = BWTSeqLength((const BWTSeq *) fmindex); gt_assert(bwtlen > 0); return bwtlen - 1; }
extern SASeqSrc * BWTSeqNewSASeqSrc(const BWTSeq *bwtSeq, const BWTSeqContextRetriever *ctxMap) { struct BWTSASeqSrc *newBWTSASeqSrc; gt_assert(bwtSeq); newBWTSASeqSrc = gt_malloc(sizeof (*newBWTSASeqSrc)); { RandomSeqAccessor origSeqAccess; if (ctxMap) { origSeqAccess.accessFunc = BWTSASSAccessOrigSeq; origSeqAccess.state = (void *)ctxMap; } else { origSeqAccess.accessFunc = NULL; origSeqAccess.state = NULL; } initSASeqSrc(&newBWTSASeqSrc->baseClass, BWTSeqLength(bwtSeq), NULL, BWTSASSCreateReader, BWTSASSGetRot0Pos, NULL, origSeqAccess, deleteBWTSeqSASS, BWTSASSNewMRAEnc, NULL, NULL); /* since BWTSeq can regenerate * arbitrary portions of all * associated data (when reversibly * sorted) no generator is necessary * and all readers just keep their state */ } newBWTSASeqSrc->ctxMap = ctxMap; newBWTSASeqSrc->bwtSeq = bwtSeq; newBWTSASeqSrc->readerStateList = NULL; return &newBWTSASeqSrc->baseClass; }
extern Seqpos BWTSeqLocateMatch(const BWTSeq *bwtSeq, Seqpos pos, struct extBitsRetrieval *extBits) { if (bwtSeq->featureToggles & BWTLocateBitmap) { Seqpos nextLocate = pos; unsigned locateOffset = 0; while (!BWTSeqPosHasLocateInfo(bwtSeq, nextLocate, extBits)) nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, extBits), ++locateOffset; EISRetrieveExtraBits(bwtSeq->seqIdx, nextLocate, EBRF_RETRIEVE_CWBITS | EBRF_RETRIEVE_VARBITS, extBits, bwtSeq->hint); { Seqpos maxPosVal = ((bwtSeq->featureToggles & BWTReversiblySorted)? (BWTSeqLength(bwtSeq) - 1) /bwtSeq->locateSampleInterval: BWTSeqLength(bwtSeq) - 1); unsigned bitsPerCount = requiredSeqposBits(extBits->len), bitsPerBWTPos = requiredSeqposBits(extBits->len - 1), bitsPerOrigPos = requiredSeqposBits(maxPosVal); BitOffset locateRecordIndex = gt_bs1BitsCount(extBits->cwPart, extBits->cwOffset, nextLocate - extBits->start), locateRecordOffset = ((bwtSeq->featureToggles & BWTLocateCount? bitsPerBWTPos:0) + bitsPerOrigPos) * locateRecordIndex + ((bwtSeq->featureToggles & BWTLocateCount)?bitsPerCount:0); Seqpos matchPos = gt_bsGetSeqpos( extBits->varPart, extBits->varOffset + locateRecordOffset + ((bwtSeq->featureToggles & BWTLocateCount)?bitsPerBWTPos:0), bitsPerOrigPos); if (bwtSeq->featureToggles & BWTReversiblySorted) matchPos = matchPos * bwtSeq->locateSampleInterval; matchPos += locateOffset; gt_assert(!(bwtSeq->featureToggles & BWTLocateCount) || gt_bsGetSeqpos(extBits->varPart, extBits->varOffset + locateRecordOffset, bitsPerBWTPos) == nextLocate - extBits->start); return matchPos; } } else if (bwtSeq->featureToggles & BWTLocateCount) { BitOffset markOffset; Seqpos nextLocate = pos, matchPos; unsigned locateOffset = 0; /* mark is at most locateInterval * positions away */ unsigned bitsPerOrigPos = requiredSeqposBits(((bwtSeq->featureToggles & BWTReversiblySorted)? (BWTSeqLength(bwtSeq) - 1) /bwtSeq->locateSampleInterval: BWTSeqLength(bwtSeq) - 1)); while ((markOffset = searchLocateCountMark(bwtSeq, nextLocate, extBits)) == 0) { nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, extBits); ++locateOffset; } matchPos = gt_bsGetSeqpos(extBits->varPart, markOffset, bitsPerOrigPos); if (bwtSeq->featureToggles & BWTReversiblySorted) matchPos = matchPos * bwtSeq->locateSampleInterval; matchPos += locateOffset; return matchPos; } /* Internal error: Trying to locate in BWT sequence index without locate information. */ abort(); return 0; /* shut up compiler */ }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; GtLogger *logger = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: had_err = true; exitNow = true; break; case GT_OPTION_PARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); logger = gt_logger_new(params.verboseOutput, GT_LOGGER_DEFLT_PREFIX, stdout); bwtSeq = gt_availBWTSeq(¶ms.idx.final, logger, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = gt_BWTSeqVerifyIntegrity(bwtSeq, gt_str_get(inputProject), params.flags, params.progressInterval, stderr, logger, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !gt_initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { unsigned long totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = gt_mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, gt_str_get(inputProject), NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = gt_encseq_total_length(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = gt_alphabet_num_of_chars( gt_encseq_alphabet(suffixarray.encseq)); if (params.minPatLen < 0) params.minPatLen = gt_recommendedprefixlength(numofchars, totalLen, GT_RECOMMENDED_MULTIPLIER_DEFAULT, true); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * gt_recommendedprefixlength(numofchars,totalLen, GT_RECOMMENDED_MULTIPLIER_DEFAULT, true)/100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" "%lu vs. %lu", totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = gt_newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = gt_nextEnumpatterniterator(&patternLen, epi); GtMMsearchiterator *mmsi = gt_mmsearchiterator_new_complete_olain(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !gt_reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } gt_assert(gt_EMINumMatchesTotal(&EMIter) == gt_BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(gt_EMINumMatchesTotal(&EMIter) == gt_mmsearchiterator_count(mmsi)); while (gt_mmsearchiterator_next(&dbstart,mmsi)) { unsigned long matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n%lu vs. %lu\n", matchPos, dbstart); } } if (!had_err) { unsigned long matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { unsigned long numFMIMatches = gt_BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = gt_mmsearchiterator_count(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" "%lu) and fmindex (%lu).\n", numFMIMatches, numMMSearchMatches); } } gt_mmsearchiterator_delete(mmsi); mmsi = NULL; if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) gt_destructEMIterator(&EMIter); if (saIsLoaded) gt_freesuffixarray(&suffixarray); gt_freeEnumpatterniterator(epi); if (bwtSeq) gt_deleteBWTSeq(bwtSeq); if (logger) gt_logger_delete(logger); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; Verboseinfo *verbosity = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: had_err = true; exitNow = true; break; case OPTIONPARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); bwtSeq = availBWTSeq(¶ms.idx.final, verbosity, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags, params.progressInterval, stderr, verbosity, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { Seqpos totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, inputProject, NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = getencseqtotallength(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = getencseqAlphabetnumofchars(suffixarray.encseq); if (params.minPatLen < 0) params.minPatLen = recommendedprefixlength(numofchars, totalLen); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * recommendedprefixlength(numofchars, totalLen) / 100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" FormatSeqpos" vs. "FormatSeqpos, totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi); MMsearchiterator *mmsi = newmmsearchiterator(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { Seqpos numMatches; if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } numMatches = EMINumMatchesTotal(&EMIter); gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi)); /* fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */ /* "pattern: ", trial, numMatches); */ /* fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */ /* patternLen); */ /* putc('\n', stderr); */ while (nextmmsearchiterator(&dbstart,mmsi)) { Seqpos matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n", matchPos, dbstart); } } if (!had_err) { Seqpos matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = countmmsearchiterator(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" FormatSeqpos") and fmindex ("FormatSeqpos".\n", numFMIMatches, numMMSearchMatches); } } freemmsearchiterator(&mmsi); if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) destructEMIterator(&EMIter); if (saIsLoaded) freesuffixarray(&suffixarray); if (epi) freeEnumpatterniterator(&epi); if (bwtSeq) deleteBWTSeq(bwtSeq); if (verbosity) freeverboseinfo(&verbosity); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }
enum verifyBWTSeqErrCode gt_BWTSeqVerifyIntegrity(BWTSeq *bwtSeq, const char *projectName, int checkFlags, GtUword tickPrint, FILE *fp, GtLogger *verbosity, GtError *err) { Suffixarray suffixArray; struct extBitsRetrieval extBits; bool suffixArrayIsInitialized = false, extBitsAreInitialized = false; enum verifyBWTSeqErrCode retval = VERIFY_BWTSEQ_NO_ERROR; do { GtUword seqLen; gt_assert(bwtSeq && projectName && err); gt_error_check(err); initExtBitsRetrieval(&extBits); extBitsAreInitialized = true; if (gt_mapsuffixarray(&suffixArray, SARR_SUFTAB | SARR_ESQTAB, projectName, verbosity, err)) { gt_error_set(err, "Cannot load reference suffix array project with" " demand for suffix table file and encoded sequence" " for project: %s", projectName); retval = VERIFY_BWTSEQ_REFLOAD_ERROR; break; } suffixArrayIsInitialized = true; seqLen = gt_encseq_total_length(suffixArray.encseq) + 1; if (BWTSeqLength(bwtSeq) != seqLen) { gt_error_set(err, "length mismatch for suffix array project %s and " "bwt sequence index", projectName); retval = VERIFY_BWTSEQ_LENCOMPARE_ERROR; break; } if (checkFlags & VERIFY_BWTSEQ_SUFVAL && BWTSeqHasLocateInformation(bwtSeq)) { GtUword i; for (i = 0; i < seqLen && retval == VERIFY_BWTSEQ_NO_ERROR; ++i) { if (gt_BWTSeqPosHasLocateInfo(bwtSeq, i, &extBits)) { GtUword sfxArrayValue = gt_BWTSeqLocateMatch(bwtSeq, i, &extBits); if (sfxArrayValue != ESASUFFIXPTRGET(suffixArray.suftab,i)) { gt_error_set(err, "Failed suffix array value comparison" " at position "GT_WU": "GT_WU" != "GT_WU"", i, sfxArrayValue, ESASUFFIXPTRGET(suffixArray.suftab,i)); retval = VERIFY_BWTSEQ_SUFVAL_ERROR; break; } } if (tickPrint && !((i + 1) % tickPrint)) putc('.', fp); } if (tickPrint) putc('\n', fp); if (retval != VERIFY_BWTSEQ_NO_ERROR) break; } else if (checkFlags & VERIFY_BWTSEQ_SUFVAL) { gt_error_set(err, "check of suffix array values was requested," " but index contains no locate information!"); retval = VERIFY_BWTSEQ_SUFVAL_ERROR; break; } else if (!(checkFlags & VERIFY_BWTSEQ_SUFVAL) && BWTSeqHasLocateInformation(bwtSeq)) { fputs("Not checking suftab values.\n", stderr); } if (BWTSeqHasLocateInformation(bwtSeq)) { GtUword nextLocate = BWTSeqTerminatorPos(bwtSeq); if (suffixArray.longest.defined && suffixArray.longest.valueunsignedlong != nextLocate) { gt_error_set(err, "terminator/0-rotation position mismatch "GT_WU"" " vs. "GT_WU"", suffixArray.longest.valueunsignedlong, nextLocate); retval = VERIFY_BWTSEQ_TERMPOS_ERROR; break; } if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK) && (bwtSeq->featureToggles & BWTReversiblySorted)) { GtUword i = seqLen; /* handle first symbol specially because the encseq * will not return the terminator symbol */ { Symbol sym = BWTSeqGetSym(bwtSeq, nextLocate); if (sym != UNDEFBWTCHAR) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", i - 1, (int)sym, (int)UNDEFBWTCHAR); retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR; break; } --i; nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits); } while (i > 0) { Symbol symRef = gt_encseq_get_encoded_char(suffixArray.encseq, --i, suffixArray.readmode); Symbol symCmp = BWTSeqGetSym(bwtSeq, nextLocate); if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", i, symCmp, symRef); retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR; break; } nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits); } if (retval != VERIFY_BWTSEQ_NO_ERROR) break; } else if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK) && !(bwtSeq->featureToggles & BWTReversiblySorted)) { gt_error_set(err, "requested complete backwards regeneration in index" " without regeneration capability"); retval = VERIFY_BWTSEQ_LFMAPWALK_IMP_ERROR; break; } } if (checkFlags & VERIFY_BWTSEQ_CONTEXT) { BWTSeqContextRetriever *bwtSeqCR = gt_BWTSeqCRLoad(bwtSeq, projectName, CTX_MAP_ILOG_AUTOSIZE); if (!bwtSeqCR) { gt_error_set(err, "cannot load BWT sequence context access table" " for project %s", projectName); retval = VERIFY_BWTSEQ_CONTEXT_LOADFAIL; break; } fputs("Checking context regeneration.\n", stderr); { GtUword i, start, subSeqLen, maxSubSeqLen = MIN(MAX(MIN_CONTEXT_LEN, seqLen/CONTEXT_FRACTION), MAX_CONTEXT_LEN), numTries = MIN(MAX_NUM_CONTEXT_CHECKS, MAX(2, seqLen/CONTEXT_INTERVAL)); Symbol *contextBuf = gt_malloc(sizeof (Symbol) * MAX_CONTEXT_LEN); GtEncseqReader *esr = gt_encseq_create_reader_with_readmode(suffixArray.encseq, suffixArray.readmode, 0); for (i = 0; i < numTries && retval == VERIFY_BWTSEQ_NO_ERROR; ++i) { GtUword j, end, inSubSeqLen; subSeqLen = random()%maxSubSeqLen + 1; start = random()%(seqLen - subSeqLen + 1); end = start + subSeqLen; inSubSeqLen = subSeqLen - ((end==seqLen)?1:0); gt_BWTSeqCRAccessSubseq(bwtSeqCR, start, subSeqLen, contextBuf); gt_encseq_reader_reinit_with_readmode(esr, suffixArray.encseq, suffixArray.readmode, start); for (j = 0; j < inSubSeqLen; ++j) { Symbol symRef = gt_encseq_reader_next_encoded_char(esr); Symbol symCmp = contextBuf[j]; if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", start + j, (int)symCmp, (int)symRef); retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL; break; } } while (j < subSeqLen) { Symbol symRef = UNDEFBWTCHAR; Symbol symCmp = contextBuf[j]; if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", start + j, (int)symCmp, (int)symRef); retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL; break; } ++j; } } if (retval == VERIFY_BWTSEQ_NO_ERROR) fputs("Context regeneration completed successfully.\n", stderr); gt_encseq_reader_delete(esr); gt_free(contextBuf); } gt_deleteBWTSeqCR(bwtSeqCR); } } while (0); if (suffixArrayIsInitialized) gt_freesuffixarray(&suffixArray); if (extBitsAreInitialized) destructExtBitsRetrieval(&extBits); return retval; }