extern EISeq * createEncIdxSeq(const GtStr *projectName, const struct seqBaseParam *params, size_t numExtHeaders, const uint16_t *headerIDs, const uint32_t *extHeaderSizes, headerWriteFunc *extHeaderCallbacks, void **headerCBData, bitInsertFunc biFunc, BitOffset cwExtBitsPerPos, varExtBitsEstimator biVarBits, void *cbState, Verboseinfo *verbosity, GtError *err) { Suffixarray suffixArray; struct encIdxSeq *newSeqIdx; Seqpos length; gt_assert(projectName); /* map and interpret index project file */ if (streamsuffixarray(&suffixArray, SARR_SUFTAB | SARR_BWTTAB, projectName, verbosity, err)) return NULL; length = getencseqtotallength(suffixArray.encseq) + 1; newSeqIdx = createEncIdxSeqFromSA(&suffixArray, length, projectName, params, numExtHeaders, headerIDs, extHeaderSizes, extHeaderCallbacks, headerCBData, biFunc, cwExtBitsPerPos, biVarBits, cbState, err); freesuffixarray(&suffixArray); return newSeqIdx; }
static void testmulticharactercompare(const Encodedsequence *encseq, Readmode readmode, unsigned long multicharcmptrials) { Encodedsequencescanstate *esr1, *esr2; Seqpos pos1, pos2, totallength; unsigned long trial; bool fwd = ISDIRREVERSE(readmode) ? false : true, complement = ISDIRCOMPLEMENT(readmode) ? true : false; esr1 = newEncodedsequencescanstate(); esr2 = newEncodedsequencescanstate(); totallength = getencseqtotallength(encseq); srand48(42349421); (void) multicharactercompare_withtest(encseq,fwd,complement,esr1,0,esr2,0); (void) multicharactercompare_withtest(encseq,fwd,complement,esr1,0,esr2, totallength-1); (void) multicharactercompare_withtest(encseq,fwd,complement,esr1, totallength-1,esr2,0); (void) multicharactercompare_withtest(encseq,fwd,complement,esr1, totallength-1,esr2,totallength-1); for (trial = 0; trial < multicharcmptrials; trial++) { pos1 = (Seqpos) (drand48() * (double) totallength); pos2 = (Seqpos) (drand48() * (double) totallength); (void) multicharactercompare_withtest(encseq,fwd,complement, esr1,pos1,esr2,pos2); } freeEncodedsequencescanstate(&esr1); freeEncodedsequencescanstate(&esr2); }
static void runscanatpostrial(const Encodedsequence *encseq, Encodedsequencescanstate *esr, Readmode readmode,Seqpos startpos) { Seqpos pos, totallength; GtUchar ccra, ccsr; totallength = getencseqtotallength(encseq); initEncodedsequencescanstate(esr,encseq,readmode,startpos); for (pos=startpos; pos < totallength; pos++) { ccra = getencodedchar(encseq,pos,readmode); /* Random access */ ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode); if (ccra != ccsr) { fprintf(stderr,"startpos = " FormatSeqpos " access=%s, mode=%s: position=" FormatSeqpos ": random access (correct) = %u != %u = " " sequential read (wrong)\n", PRINTSeqposcast(startpos), encseqaccessname(encseq), showreadmode(readmode), PRINTSeqposcast(pos), (unsigned int) ccra, (unsigned int) ccsr); exit(GT_EXIT_PROGRAMMING_ERROR); } } }
static Seqpos *leftcontextofspecialchardist(unsigned int numofchars, const Encodedsequence *encseq, Readmode readmode) { GtUchar cc; unsigned int idx; Seqpos *specialchardist, totallength = getencseqtotallength(encseq); specialchardist = gt_malloc(sizeof(*specialchardist) * numofchars); for (idx = 0; idx<numofchars; idx++) { specialchardist[idx] = 0; } if (hasspecialranges(encseq)) { Specialrangeiterator *sri; Sequencerange range; sri = newspecialrangeiterator(encseq,true); if (ISDIRREVERSE(readmode)) { Readmode thismode = (readmode == Reversemode) ? Forwardmode : Complementmode; while (nextspecialrangeiterator(&range,sri)) { if (range.rightpos < totallength) { cc = getencodedchar(encseq,range.rightpos,thismode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } else { while (nextspecialrangeiterator(&range,sri)) { gt_assert(range.leftpos < totallength); if (range.leftpos > 0) { cc = getencodedchar(encseq,range.leftpos-1,readmode); if (ISNOTSPECIAL(cc)) { specialchardist[cc]++; } } } } freespecialrangeiterator(&sri); } if (getencseqlengthofspecialsuffix(encseq) == 0) { cc = getencodedchar(encseq,totallength-1,readmode); gt_assert(ISNOTSPECIAL(cc)); specialchardist[cc]++; } return specialchardist; }
int test_trieins(bool onlyins,const GtStr *indexname,GtError *err) { Suffixarray suffixarray; bool haserr = false; Seqpos totallength = 0; gt_error_check(err); if (streamsuffixarray(&suffixarray, SARR_ESQTAB, indexname, NULL, err) != 0) { haserr = true; } else { totallength = getencseqtotallength(suffixarray.encseq); } if (!haserr) { Mergertrierep trierep; const GtUchar *characters; ALLOCASSIGNSPACE(trierep.encseqreadinfo,NULL,Encseqreadinfo,1); trierep.encseqreadinfo[0].encseqptr = suffixarray.encseq; trierep.encseqreadinfo[0].readmode = suffixarray.readmode; characters = getencseqAlphabetcharacters(suffixarray.encseq); mergertrie_initnodetable(&trierep,totallength,1U); maketrie(&trierep,characters,totallength); if (onlyins) { #ifdef WITHTRIEIDENT #ifdef WITHTRIESHOW showtrie(&trierep,characters); #endif checktrie(&trierep,totallength+1,totallength,err); #endif } else { #ifdef WITHTRIEIDENT #ifdef WITHTRIESHOW showallnoderelations(trierep.root); #endif #endif successivelydeletesmallest(&trierep,totallength,characters,err); } mergertrie_delete(&trierep); } freesuffixarray(&suffixarray); return haserr ? -1 : 0; }
extern EISeq * loadEncIdxSeq(const GtStr *projectName, enum seqBaseEncoding encType, int features, Verboseinfo *verbosity, GtError *err) { struct encIdxSeq *newSeqIdx = NULL; Suffixarray suffixArray; Seqpos len; do { if (streamsuffixarray(&suffixArray, 0, projectName, verbosity, err)) break; len = getencseqtotallength(suffixArray.encseq) + 1; newSeqIdx = loadEncIdxSeqForSA(&suffixArray, len, projectName, encType, features, err); freesuffixarray(&suffixArray); } while (0); return newSeqIdx; }
static void testscanatpos(const Encodedsequence *encseq, Readmode readmode, unsigned long scantrials) { Encodedsequencescanstate *esr = NULL; Seqpos startpos, totallength; unsigned long trial; totallength = getencseqtotallength(encseq); srand48(42349421); esr = newEncodedsequencescanstate(); runscanatpostrial(encseq,esr,readmode,0); runscanatpostrial(encseq,esr,readmode,totallength-1); for (trial = 0; trial < scantrials; trial++) { startpos = (Seqpos) (drand48() * (double) totallength); printf("trial %lu at " FormatSeqpos "\n",trial,PRINTSeqposcast(startpos)); runscanatpostrial(encseq,esr,readmode,startpos); } freeEncodedsequencescanstate(&esr); }
unsigned long suffixarrayuniqueforward (const void *genericindex, unsigned long offset, Seqpos left, Seqpos right, GT_UNUSED Seqpos *witnessposition, const GtUchar *qstart, const GtUchar *qend) { Simplelcpinterval itv; const GtUchar *qptr; const Suffixarray *suffixarray = (const Suffixarray *) genericindex; Seqpos totallength; itv.left = left; itv.right = right; totallength = getencseqtotallength(suffixarray->encseq); for (qptr = qstart; /* Nothing */; qptr++, offset++) { if (itv.left < itv.right) { if (qptr >= qend || ISSPECIAL(*qptr) || !lcpintervalfindcharchildintv(suffixarray->encseq, suffixarray->readmode, totallength, suffixarray->suftab, &itv, *qptr, (Seqpos) offset, itv.left, itv.right)) { break; } } else { return offset; } } return 0; }
unsigned long suffixarraymstats (const void *genericindex, unsigned long offset, Seqpos left, Seqpos right, Seqpos *witnessposition, const GtUchar *qstart, const GtUchar *qend) { Simplelcpinterval itv; const GtUchar *qptr; const Suffixarray *suffixarray = (const Suffixarray *) genericindex; Seqpos totallength; itv.left = left; itv.right = right; totallength = getencseqtotallength(suffixarray->encseq); for (qptr = qstart; /* Nothing */; qptr++, offset++) { gt_assert(itv.left <= itv.right); if (qptr >= qend || ISSPECIAL(*qptr) || !lcpintervalfindcharchildintv(suffixarray->encseq, suffixarray->readmode, totallength, suffixarray->suftab, &itv, *qptr, (Seqpos) offset, itv.left,itv.right)) { if (witnessposition != NULL) { *witnessposition = suffixarray->suftab[itv.left]; } break; } } return offset; }
Definedunsignedlong forwardprefixmatch(const Encodedsequence *encseq, unsigned int alphasize, Seqpos startpos, bool nowildcards, unsigned long *eqsvector, const GtUchar *useq, unsigned long ulen, unsigned long maxdistance) { DECLARELOCALVARS; Seqpos pos, totallength = getencseqtotallength(encseq); GtUchar cc; Definedunsignedlong result; initeqsvector(eqsvector,(unsigned long) alphasize,useq,ulen); gt_assert(maxdistance > 0); for (pos = startpos; /* Nothing */; pos++) { gt_assert(pos - startpos <= (Seqpos) (ulen + maxdistance)); cc = getencodedchar(encseq,pos,Forwardmode); if (nowildcards && cc == (GtUchar) WILDCARD) { result.defined = false; result.valueunsignedlong = 0; return result; } COMPUTENEWDIST(cc); if (distval <= maxdistance || pos == totallength-1) { break; } } result.defined = true; result.valueunsignedlong = (unsigned long) (pos - startpos + 1); return result; }
static int testfullscan(const GtStrArray *filenametab, const Encodedsequence *encseq, Readmode readmode, GtError *err) { Seqpos pos, totallength; GtUchar ccscan = 0, ccra, ccsr; GtSequenceBuffer *fb = NULL; int retval; bool haserr = false; Encodedsequencescanstate *esr; unsigned long long fullscanpbar = 0; gt_error_check(err); totallength = getencseqtotallength(encseq); gt_progressbar_start(&fullscanpbar,(unsigned long long) totallength); if (filenametab != NULL) { fb = gt_sequence_buffer_new_guess_type((GtStrArray*) filenametab, err); if (!fb) haserr = true; if (!haserr) gt_sequence_buffer_set_symbolmap(fb, getencseqAlphabetsymbolmap(encseq)); } if (!haserr) { esr = newEncodedsequencescanstate(); initEncodedsequencescanstate(esr,encseq,readmode,0); for (pos=0; /* Nothing */; pos++) { if (filenametab != NULL && readmode == Forwardmode) { retval = gt_sequence_buffer_next(fb,&ccscan,err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } } else { if (pos >= totallength) { break; } } ccra = getencodedchar(encseq,pos,readmode); /* Random access */ if (filenametab != NULL && readmode == Forwardmode) { if (ccscan != ccra) { gt_error_set(err,"access=%s, position=" FormatSeqpos ": scan (readnextchar) = %u != " "%u = random access", encseqaccessname(encseq), pos, (unsigned int) ccscan, (unsigned int) ccra); haserr = true; break; } } ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode); if (ccra != ccsr) { gt_error_set(err,"access=%s, mode=%s: position=" FormatSeqpos ": random access = %u != %u = sequential read", encseqaccessname(encseq), showreadmode(readmode), pos, (unsigned int) ccra, (unsigned int) ccsr); haserr = true; break; } fullscanpbar++; } gt_progressbar_stop(); } if (!haserr) { if (pos != totallength) { gt_error_set(err,"sequence length must be " FormatSeqpos " but is " FormatSeqpos,totallength,pos); haserr = true; } } freeEncodedsequencescanstate(&esr); gt_sequence_buffer_delete(fb); return haserr ? -1 : 0; }
void gt_copysortsuffixes(const GtBucketspec2 *bucketspec2, Seqpos *suftab, Verboseinfo *verboseinfo) { Seqpos hardwork = 0, **targetptr; unsigned int idx, idxsource, source, second; #ifdef WITHSUFFIXES { const Seqpos *ptr; for (ptr = suftab; ptr < suftab + bucketspec2->partwidth; ptr++) { showsequenceatstartpos(stdout, ISDIRREVERSE(readmode) ? false : true, ISDIRCOMPLEMENT(readmode) ? true : false, encseq, *ptr); } } #endif targetptr = gt_malloc(sizeof(*targetptr) * bucketspec2->numofchars); for (idxsource = 0; idxsource<bucketspec2->numofchars; idxsource++) { source = bucketspec2->order[idxsource]; for (second = 0; second < bucketspec2->numofchars; second++) { if (!bucketspec2->subbuckettab[source][second].sorted && source != second) { gt_assert(bucketspec2->subbuckettab[source][second].hardworktodo); showverbose(verboseinfo,"hard work for %u %u",source,second); hardwork += getendidx(bucketspec2,source,second) - getstartidx(bucketspec2,source,second); bucketspec2->subbuckettab[source][second].sorted = true; } else { gt_assert(!bucketspec2->subbuckettab[source][second].hardworktodo); } } if (getstartidx(bucketspec2,source,0) < getstartidx(bucketspec2,source,source)) { for (idx = 0; idx < bucketspec2->numofchars; idx++) { targetptr[idx] = suftab + getstartidx(bucketspec2,idx,source); } forwardderive(bucketspec2, targetptr, source, suftab + getstartidx(bucketspec2,source,0)); } if (getendidx(bucketspec2,source,source) < getendidx(bucketspec2,source,bucketspec2->numofchars)) { for (idx = 0; idx < bucketspec2->numofchars; idx++) { targetptr[idx] = suftab + getendidx(bucketspec2,idx,source) - 1; } backwardderive(bucketspec2, targetptr, source, suftab + getendidx(bucketspec2,source,bucketspec2->numofchars) - 1); } for (idx = 0; idx < bucketspec2->numofchars; idx++) { bucketspec2->subbuckettab[idx][source].sorted = true; } bucketspec2->superbuckettab[source].sorted = true; } gt_free(targetptr); showverbose(verboseinfo,"hardwork = " FormatSeqpos " (%.2f)", PRINTSeqposcast(hardwork), (double) hardwork/getencseqtotallength(bucketspec2->encseq)); }
extern EISeq * createBWTSeqGeneric(const struct bwtParam *params, indexCreateFunc createIndex, SASeqSrc *src, const enum rangeSortMode rangeSort[], const SpecialsRankLookup *sprTable, GtError *err) { struct encIdxSeq *baseSeqIdx = NULL; struct addLocateInfoState varState; bool varStateIsInitialized = false; unsigned locateInterval; BWTSeqContextRetrieverFactory *buildContextMap = NULL; gt_assert(src && params && err); gt_error_check(err); locateInterval = params->locateInterval; do { struct locateHeaderWriteInfo locHeaderData = { src, locateInterval, params->featureToggles }; struct sortModeHeader sortModeHeader; void *p[] = { &locHeaderData , &sortModeHeader }; uint16_t headerIDs[] = { LOCATE_INFO_IN_INDEX_HEADERID, RANK_SORT_HEADERID }; uint32_t headerSizes[] = { LOCATE_HEADER_SIZE, 0 }; headerWriteFunc headerFuncs[] = { writeLocateInfoHeader, writeRankSortHeader }; size_t numHeaders = 0; unsigned bitsPerOrigRank = 0; Seqpos totalLen = SASSGetLength(src); const MRAEnc *alphabet = SASSGetMRAEnc(src); MRAEnc *baseAlphabet = SASSNewMRAEnc(src); /* FIXME: this has to work also when locateInterval == 0 and * sprTable != NULL */ if (params->ctxMapILog != CTX_MAP_ILOG_NOMAP) buildContextMap = newBWTSeqContextRetrieverFactory(totalLen, params->ctxMapILog); if (locateInterval) { ++numHeaders; if (sortModeHeaderNeeded(alphabet, rangeSort, sprTable)) { Seqpos #ifndef NDEBUG origSeqLen = getencseqtotallength(SPRTGetOrigEncseq(sprTable)), #endif maxRank; gt_assert(origSeqLen == totalLen - 1); maxRank = specialsRank(sprTable, totalLen - 1); bitsPerOrigRank = sortModeHeader.bitsPerOrigRank = requiredSeqposBits(maxRank); sortModeHeader.alphabet = alphabet; sortModeHeader.rangeSort = rangeSort; headerSizes[1] = computeSortModeHeaderSize(alphabet); ++numHeaders; } { SeqDataReader readSfxIdx = SASSCreateReader(src, SFX_REQUEST_SUFTAB); if (SDRIsValid(readSfxIdx)) { initAddLocateInfoState( &varState, SASSGetOrigSeqAccessor(src), readSfxIdx, alphabet, SASSGetSeqStats(src), rangeSort, totalLen, params, bitsPerOrigRank?sprTable:NULL, bitsPerOrigRank, buildContextMap); varStateIsInitialized = true; } else { gt_error_set(err, "error: locate sampling requested but not available" " for project %s\n", gt_str_get(params->projectName)); } } } if (!(baseSeqIdx = createIndex(totalLen, params->projectName, baseAlphabet, SASSGetSeqStats(src), SASSCreateReader(src, SFX_REQUEST_BWTTAB), ¶ms->seqParams, numHeaders, headerIDs, headerSizes, headerFuncs, p, locateInterval?addLocateInfo:NULL, /* one bit per position if using bitmap */ (params->featureToggles & BWTLocateBitmap)?1:0, locateInterval?locBitsUpperBounds:NULL, &varState, err))) break; if (buildContextMap) { if (!BWTSCRFFinished(buildContextMap)) { fputs("error: context table construction incomplete!\n", stderr); } else { BWTSeqContextRetriever *ctxRetrieve = BWTSCRFGet(buildContextMap, NULL, params->projectName); deleteBWTSeqCR(ctxRetrieve); } } } while (0); if (buildContextMap) deleteBWTSeqContextRetrieverFactory(buildContextMap); if (varStateIsInitialized) destructAddLocateInfoState(&varState); return baseSeqIdx; }
static int inputsuffixarray(bool map, Suffixarray *suffixarray, unsigned int demand, const GtStr *indexname, Verboseinfo *verboseinfo, GtError *err) { bool haserr = false; Seqpos totallength = 0; gt_error_check(err); initsuffixarray(suffixarray); suffixarray->encseq = mapencodedsequence(true, indexname, (demand & SARR_ESQTAB) ? true : false, (demand & SARR_DESTAB) ? true : false, (demand & SARR_SDSTAB) ? true : false, (demand & SARR_SSPTAB) ? true : false, verboseinfo, err); if (suffixarray->encseq == NULL) { haserr = true; } else { totallength = getencseqtotallength(suffixarray->encseq); } if (!haserr) { haserr = scanprjfileuintkeys(suffixarray,indexname,verboseinfo,err); } if (!haserr && (demand & SARR_SUFTAB)) { if (map) { suffixarray->suftab = genericmaptable(indexname, SUFTABSUFFIX, (unsigned long) (totallength+1), sizeof (Seqpos), err); if (suffixarray->suftab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->suftabstream,Seqpos, SUFTABSUFFIX); } if (!haserr && !suffixarray->longest.defined) { gt_error_set(err,"longest not defined"); haserr = true; } } if (!haserr && (demand & SARR_LCPTAB)) { if (map) { suffixarray->lcptab = genericmaptable(indexname, LCPTABSUFFIX, (unsigned long) (totallength+1), sizeof (GtUchar), err); if (suffixarray->lcptab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar, LCPTABSUFFIX); if (!haserr && fseek(suffixarray->lcptabstream.fp,(long) sizeof (GtUchar),SEEK_SET)) { gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno)); haserr = true; } } if (!haserr && !suffixarray->numoflargelcpvalues.defined) { gt_error_set(err,"numoflargelcpvalues not defined"); haserr = true; } if (!haserr && suffixarray->numoflargelcpvalues.valueseqpos > 0) { if (map) { suffixarray->llvtab = genericmaptable(indexname, LARGELCPTABSUFFIX, (unsigned long) suffixarray->numoflargelcpvalues. valueseqpos, sizeof (Largelcpvalue), err); if (suffixarray->llvtab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue, LARGELCPTABSUFFIX); } } } if (!haserr && (demand & SARR_BWTTAB)) { if (map) { suffixarray->bwttab = genericmaptable(indexname, BWTTABSUFFIX, (unsigned long) (totallength+1), sizeof (GtUchar), err); if (suffixarray->bwttab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar, BWTTABSUFFIX); } } if (!haserr && (demand & SARR_BCKTAB)) { if (map) { suffixarray->bcktab = mapbcktab(indexname, getencseqAlphabetnumofchars(suffixarray-> encseq), suffixarray->prefixlength, err); if (suffixarray->bcktab == NULL) { haserr = true; } } else { gt_error_set(err,"cannot stream bcktab"); haserr = true; } } if (haserr) { freesuffixarray(suffixarray); } return haserr ? -1 : 0; }
extern int gt_packedindex_chk_search(int argc, const char *argv[], GtError *err) { struct chkSearchOptions params; Suffixarray suffixarray; Enumpatterniterator *epi = NULL; bool saIsLoaded = false; BWTSeq *bwtSeq = NULL; GtStr *inputProject = NULL; int parsedArgs; bool had_err = false; BWTSeqExactMatchesIterator EMIter; bool EMIterInitialized = false; Verboseinfo *verbosity = NULL; inputProject = gt_str_new(); do { gt_error_check(err); { bool exitNow = false; switch (parseChkBWTOptions(&parsedArgs, argc, argv, ¶ms, inputProject, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: had_err = true; exitNow = true; break; case OPTIONPARSER_REQUESTS_EXIT: exitNow = true; break; } if (exitNow) break; } gt_str_set(inputProject, argv[parsedArgs]); verbosity = newverboseinfo(params.verboseOutput); bwtSeq = availBWTSeq(¶ms.idx.final, verbosity, err); if ((had_err = bwtSeq == NULL)) break; { enum verifyBWTSeqErrCode retval = BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags, params.progressInterval, stderr, verbosity, err); if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR))) { fprintf(stderr, "index integrity check failed: %s\n", gt_error_get(err)); gt_error_set(err, "aborted because of index integrity check fail"); break; } } if (BWTSeqHasLocateInformation(bwtSeq)) { if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq))) { gt_error_set(err, "Cannot create matches iterator for sequence index."); break; } EMIterInitialized = true; } { Seqpos totalLen, dbstart; unsigned long trial, patternLen; if ((had_err = mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB, inputProject, NULL, err) != 0)) { gt_error_set(err, "Can't load suffix array project with" " demand for encoded sequence and suffix table files\n"); break; } totalLen = getencseqtotallength(suffixarray.encseq); saIsLoaded = true; if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L && params.minPatLen > params.maxPatLen))) { gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;" " min <= max is required.", params.minPatLen, params.maxPatLen); break; } if (params.minPatLen < 0 || params.maxPatLen < 0) { unsigned int numofchars = getencseqAlphabetnumofchars(suffixarray.encseq); if (params.minPatLen < 0) params.minPatLen = recommendedprefixlength(numofchars, totalLen); if (params.maxPatLen < 0) params.maxPatLen = MAX(params.minPatLen, 125 * recommendedprefixlength(numofchars, totalLen) / 100); else params.maxPatLen = MAX(params.maxPatLen, params.minPatLen); } fprintf(stderr, "Using patterns of lengths %lu to %lu\n", params.minPatLen, params.maxPatLen); if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq))) { gt_error_set(err, "base suffix array and index have diferrent lengths!" FormatSeqpos" vs. "FormatSeqpos, totalLen + 1, BWTSeqLength(bwtSeq)); break; } if ((had_err = (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen, suffixarray.encseq, err)) == NULL)) { fputs("Creation of pattern iterator failed!\n", stderr); break; } for (trial = 0; !had_err && trial < params.numOfSamples; ++trial) { const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi); MMsearchiterator *mmsi = newmmsearchiterator(suffixarray.encseq, suffixarray.suftab, 0, /* leftbound */ totalLen, /* rightbound */ 0, /* offset */ suffixarray.readmode, pptr, patternLen); if (BWTSeqHasLocateInformation(bwtSeq)) { Seqpos numMatches; if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen, false))) { fputs("Internal error: failed to reinitialize pattern match" " iterator", stderr); abort(); } numMatches = EMINumMatchesTotal(&EMIter); gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen, false)); gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi)); /* fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */ /* "pattern: ", trial, numMatches); */ /* fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */ /* patternLen); */ /* putc('\n', stderr); */ while (nextmmsearchiterator(&dbstart,mmsi)) { Seqpos matchPos = 0; bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = !match)) { gt_error_set(err, "matches of packedindex expired before mmsearch!"); break; } if ((had_err = matchPos != dbstart)) { gt_error_set(err, "packedindex match doesn't equal mmsearch " "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n", matchPos, dbstart); } } if (!had_err) { Seqpos matchPos; bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq); if ((had_err = trailingMatch)) { gt_error_set(err, "matches of mmsearch expired before fmindex!"); break; } } } else { Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen, false), numMMSearchMatches = countmmsearchiterator(mmsi); if ((had_err = numFMIMatches != numMMSearchMatches)) { gt_error_set(err, "Number of matches not equal for suffix array (" FormatSeqpos") and fmindex ("FormatSeqpos".\n", numFMIMatches, numMMSearchMatches); } } freemmsearchiterator(&mmsi); if (params.progressInterval && !((trial + 1) % params.progressInterval)) putc('.', stderr); } if (params.progressInterval) putc('\n', stderr); fprintf(stderr, "Finished %lu of %lu matchings successfully.\n", trial, params.numOfSamples); } } while (0); if (EMIterInitialized) destructEMIterator(&EMIter); if (saIsLoaded) freesuffixarray(&suffixarray); if (epi) freeEnumpatterniterator(&epi); if (bwtSeq) deleteBWTSeq(bwtSeq); if (verbosity) freeverboseinfo(&verbosity); if (inputProject) gt_str_delete(inputProject); return had_err?-1:0; }