void getencseqkmers(const GtEncseq *encseq, GtReadmode readmode, unsigned int kmersize, void(*processkmercode)(void *, unsigned long, const GtKmercode *), void *processkmercodeinfo) { unsigned long currentposition = 0, totallength; Kmerstream *spwp; GtUchar charcode; GtEncseqReader *esr; unsigned int numofchars, overshoot; totallength = gt_encseq_total_length(encseq); if (totallength < (unsigned long) kmersize) { return; } numofchars = gt_alphabet_num_of_chars(gt_encseq_alphabet(encseq)); spwp = kmerstream_new(numofchars,kmersize); esr = gt_encseq_create_reader_with_readmode(encseq,readmode,0); for (currentposition = 0; currentposition < (unsigned long) kmersize; currentposition++) { charcode = gt_encseq_reader_next_encoded_char(esr); GT_CHECKENCCHAR(charcode,encseq,currentposition,readmode); spwp->windowwidth++; updatespecialpositions(spwp,charcode,false,0); spwp->cyclicwindow[spwp->windowwidth-1] = charcode; } kmerstream_newcode(&spwp->currentkmercode,spwp); processkmercode(processkmercodeinfo,0,&spwp->currentkmercode); for (currentposition = (unsigned long) kmersize; currentposition<totallength; currentposition++) { charcode = gt_encseq_reader_next_encoded_char(esr); GT_CHECKENCCHAR(charcode,encseq,currentposition,readmode); shiftrightwithchar(spwp,charcode); kmerstream_newcode(&spwp->currentkmercode,spwp); processkmercode(processkmercodeinfo,currentposition + 1 - spwp->kmersize, &spwp->currentkmercode); } gt_encseq_reader_delete(esr); for (overshoot=0; overshoot<kmersize; overshoot++) { shiftrightwithchar(spwp,(GtUchar) WILDCARD); kmerstream_newcode(&spwp->currentkmercode,spwp); processkmercode(processkmercodeinfo, overshoot + currentposition + 1 - spwp->kmersize, &spwp->currentkmercode); } kmerstream_delete(spwp); }
static GtUword gt_mmsearch_extendright(const GtEncseq *dbencseq, GtEncseqReader *esr, GtReadmode readmode, GtUword totallength, GtUword dbend, const GtQuerysubstring *querysubstring, GtUword matchlength) { GtUchar dbchar; GtUword dbpos, querypos; if (dbend < totallength) { gt_encseq_reader_reinit_with_readmode(esr,dbencseq,readmode,dbend); } for (dbpos = dbend, querypos = querysubstring->currentoffset + matchlength; dbpos < totallength && querypos < querysubstring->queryrep->seqlen; dbpos++, querypos++) { dbchar = gt_encseq_reader_next_encoded_char(esr); if (ISSPECIAL(dbchar) || dbchar != gt_mmsearch_accessquery(querysubstring->queryrep,querypos)) { break; } } return dbpos - dbend; }
const GtKmercode *gt_kmercodeiterator_encseq_nonspecial_next( GtKmercodeiterator *kmercodeiterator) { while (true) { if (!kmercodeiterator->hasprocessedfirst) { gt_assert(kmercodeiterator->currentposition == kmercodeiterator->startpos + (unsigned long) kmercodeiterator->spwp->kmersize); kmerstream_newcode(&kmercodeiterator->kmercode, kmercodeiterator->spwp); kmercodeiterator->hasprocessedfirst = true; if (!kmercodeiterator->kmercode.definedspecialposition) return &kmercodeiterator->kmercode; } else { if (kmercodeiterator->currentposition < kmercodeiterator->totallength) { GtUchar charcode = gt_encseq_reader_next_encoded_char(kmercodeiterator->esr); shiftrightwithchar(kmercodeiterator->spwp,charcode); kmerstream_newcode(&kmercodeiterator->kmercode, kmercodeiterator->spwp); kmercodeiterator->currentposition++; if (!kmercodeiterator->kmercode.definedspecialposition) return &kmercodeiterator->kmercode; } else break; } } return NULL; }
static GtUchar sequenceobject_get_char(Sequenceobject *seq,GtUword pos) { if (seq->twobitencoding != NULL) { return gt_twobitencoding_char_at_pos(seq->twobitencoding, seq->forward ? seq->startpos + pos : seq->startpos - pos); } if (seq->encseqreader != NULL) { const GtUword addamount = 256UL; if (seq->min_access_pos != GT_UWORD_MAX && seq->min_access_pos >= seq->cache_offset + addamount) { GtUword idx, end = MIN(seq->cache_num_positions,seq->substringlength); GtUchar *cs = ((GtUchar *) seq->sequence_cache->space) - seq->min_access_pos; for (idx = seq->min_access_pos; idx < end; idx++) { cs[idx] = seq->cache_ptr[idx]; } seq->cache_offset = seq->min_access_pos; seq->cache_ptr = ((GtUchar *) seq->sequence_cache->space) - seq->cache_offset; } if (pos >= seq->cache_num_positions) { GtUword idx, tostore; tostore = MIN(seq->cache_num_positions + addamount,seq->substringlength); if (tostore > seq->cache_offset + seq->sequence_cache->allocated) { seq->sequence_cache->allocated += addamount; seq->sequence_cache->space = gt_realloc(seq->sequence_cache->space, sizeof (GtUchar) * seq->sequence_cache->allocated); seq->cache_ptr = ((GtUchar *) seq->sequence_cache->space) - seq->cache_offset; } gt_assert(pos >= seq->cache_offset); for (idx = seq->cache_num_positions; idx < tostore; idx++) { seq->cache_ptr[idx] = gt_encseq_reader_next_encoded_char(seq->encseqreader); } seq->cache_num_positions = tostore; } gt_assert(pos < seq->cache_offset + seq->sequence_cache->allocated); gt_assert(seq->cache_ptr != NULL); return seq->cache_ptr[pos]; } gt_assert(seq->encseq != NULL); gt_assert(seq->forward || seq->startpos >= pos); return gt_encseq_get_encoded_char(seq->encseq, seq->forward ? seq->startpos + pos : seq->startpos - pos, GT_READMODE_FORWARD); }
const GtKmercode *gt_kmercodeiterator_encseq_next( GtKmercodeiterator *kmercodeiterator) { if (!kmercodeiterator->hasprocessedfirst) { gt_assert(kmercodeiterator->currentposition == kmercodeiterator->startpos + (unsigned long) kmercodeiterator->spwp->kmersize); kmerstream_newcode(&kmercodeiterator->kmercode, kmercodeiterator->spwp); kmercodeiterator->hasprocessedfirst = true; return &kmercodeiterator->kmercode; } if (kmercodeiterator->currentposition < kmercodeiterator->totallength) { GtUchar charcode = gt_encseq_reader_next_encoded_char(kmercodeiterator->esr); shiftrightwithchar(kmercodeiterator->spwp,charcode); kmerstream_newcode(&kmercodeiterator->kmercode, kmercodeiterator->spwp); kmercodeiterator->currentposition++; return &kmercodeiterator->kmercode; } if (kmercodeiterator->currentposition < kmercodeiterator->totallength + kmercodeiterator->spwp->kmersize) { shiftrightwithchar(kmercodeiterator->spwp,(GtUchar) WILDCARD); kmerstream_newcode(&kmercodeiterator->kmercode, kmercodeiterator->spwp); kmercodeiterator->currentposition++; return &kmercodeiterator->kmercode; } return NULL; }
static int encseq_reader_lua_next_encoded_char(lua_State *L) { GtEncseqReader **reader; unsigned char cc; reader = check_encseq_reader(L, 1); cc = gt_encseq_reader_next_encoded_char(*reader); lua_pushnumber(L, cc); return 1; }
/*@notnull@*/ GtKmercodeiterator *gt_kmercodeiterator_encseq_new( const GtEncseq *encseq, GtReadmode readmode, unsigned int kmersize, unsigned long startpos) { GtKmercodeiterator *kmercodeiterator; unsigned int numofchars; GtUchar charcode; gt_assert(!GT_ISDIRREVERSE(readmode) || startpos == 0); kmercodeiterator = gt_malloc(sizeof (*kmercodeiterator)); kmercodeiterator->totallength = gt_encseq_total_length(encseq); kmercodeiterator->startpos = startpos; gt_assert(startpos < kmercodeiterator->totallength); if (kmercodeiterator->totallength - startpos < (unsigned long) kmersize) { kmercodeiterator->inputexhausted = true; kmercodeiterator->fb = NULL; kmercodeiterator->encseq = encseq; kmercodeiterator->esr = NULL; kmercodeiterator->spwp = NULL; } else { kmercodeiterator->inputexhausted = false; kmercodeiterator->fb = NULL; kmercodeiterator->encseq = encseq; kmercodeiterator->readmode = readmode; kmercodeiterator->esr = gt_encseq_create_reader_with_readmode(encseq, readmode, startpos); numofchars = gt_alphabet_num_of_chars(gt_encseq_alphabet(encseq)); kmercodeiterator->spwp = kmerstream_new(numofchars,kmersize); kmercodeiterator->hasprocessedfirst = false; for (kmercodeiterator->currentposition = startpos; kmercodeiterator->currentposition < startpos+(unsigned long) kmersize; kmercodeiterator->currentposition++) { charcode = gt_encseq_reader_next_encoded_char(kmercodeiterator->esr); kmercodeiterator->spwp->windowwidth++; updatespecialpositions(kmercodeiterator->spwp,charcode,false,0); kmercodeiterator->spwp->cyclicwindow[kmercodeiterator-> spwp->windowwidth-1] = charcode; } } return kmercodeiterator; }
void gt_encseq2symbolstring(FILE *fpout, const GtEncseq *encseq, GtReadmode readmode, unsigned long start, unsigned long wlen, unsigned long width) { unsigned long j, idx, lastpos; GtUchar currentchar; GtEncseqReader *esr; const GtAlphabet *alpha; esr = gt_encseq_create_reader_with_readmode(encseq, readmode, start); gt_assert(width > 0); lastpos = start + wlen - 1; alpha = gt_encseq_alphabet(encseq); for (idx = start, j = 0; /* Nothing */ ; idx++) { currentchar = gt_encseq_reader_next_encoded_char(esr); if (currentchar == (GtUchar) SEPARATOR) { fprintf(fpout,"\n>\n"); j = 0; } else { gt_alphabet_echo_pretty_symbol(alpha,fpout,currentchar); } if (idx == lastpos) { fprintf(fpout,"\n"); break; } if (currentchar != (GtUchar) SEPARATOR) { j++; if (j >= width) { fprintf(fpout,"\n"); j = 0; } } } gt_encseq_reader_delete(esr); }
void gt_kmercodeiterator_reset(GtKmercodeiterator *kmercodeiterator, GtReadmode readmode, GtUword startpos) { GtUchar charcode; const GtEncseq *encseq = kmercodeiterator->encseq; GtUword kmersize = (GtUword) kmercodeiterator->spwp->kmersize; gt_assert(!GT_ISDIRREVERSE(readmode) || startpos == 0); kmercodeiterator->totallength = gt_encseq_total_length(encseq); kmercodeiterator->startpos = startpos; gt_assert(startpos < kmercodeiterator->totallength); kmercodeiterator->fb = NULL; if (kmercodeiterator->totallength - startpos < kmersize) { kmercodeiterator->inputexhausted = true; gt_encseq_reader_delete(kmercodeiterator->esr); kmercodeiterator->esr = NULL; kmerstream_delete(kmercodeiterator->spwp); kmercodeiterator->spwp = NULL; } else { kmercodeiterator->inputexhausted = false; kmercodeiterator->readmode = readmode; gt_encseq_reader_reinit_with_readmode(kmercodeiterator->esr, encseq, readmode, startpos); kmerstream_reset(kmercodeiterator->spwp); kmercodeiterator->hasprocessedfirst = false; for (kmercodeiterator->currentposition = startpos; kmercodeiterator->currentposition < startpos+(GtUword) kmersize; kmercodeiterator->currentposition++) { charcode = gt_encseq_reader_next_encoded_char(kmercodeiterator->esr); kmercodeiterator->spwp->windowwidth++; kmerstream_updatespecialpositions(kmercodeiterator->spwp,charcode, false,0); kmercodeiterator->spwp->cyclicwindow[kmercodeiterator-> spwp->windowwidth-1] = charcode; } } }
static void gt_wtree_encseq_fill_bits(GtWtreeEncseq *we) { unsigned int level_idx; GtUword sym_idx; GtEncseqReader *er = gt_encseq_create_reader_with_readmode(we->encseq, GT_READMODE_FORWARD, 0); gt_assert(we != NULL); for (level_idx = 0; level_idx < we->levels; level_idx++) { for (sym_idx = 0; sym_idx < we->parent_instance.members->length; sym_idx++) { GtWtreeSymbol c_sym = gt_wtree_encseq_map(we, gt_encseq_reader_next_encoded_char(er)); if (gt_wtree_encseq_set_nodestart_and_current_fo(we, level_idx, c_sym)) { /*0*/ if (we->current_fo != NULL) { gt_assert(we->node_start + we->current_fo->offset < we->num_of_bits); we->current_fo->offset++; we->current_fo->left_size++; } } else { if (we->current_fo != NULL) { gt_assert(we->node_start + we->current_fo->offset < we->num_of_bits); GT_SETIBIT(we->bits, we->node_start + we->current_fo->offset); we->current_fo->offset++; } } } gt_encseq_reader_reinit_with_readmode(er, we->encseq, GT_READMODE_FORWARD, 0); } gt_encseq_reader_delete(er); gt_wtree_encseq_fill_offset_delete(we->root_fo); we->root_fo = we->current_fo = NULL; gt_encseq_delete(we->encseq); we->encseq = NULL; }
void gt_lookaheadsearchPSSM(const GtEncseq *encseq, const Profilematrix *prof) { unsigned long firstpos, bufsize; GtUchar currentchar; unsigned long pos; GtEncseqReader *esr; unsigned long totallength = gt_encseq_total_length(encseq); GtUchar *buffer; esr = gt_encseq_create_reader_with_readmode(encseq,GT_READMODE_FORWARD,0); buffer = gt_malloc(sizeof *buffer * prof->dimension); firstpos = bufsize = 0; for (pos=0; pos < totallength; pos++) { currentchar = gt_encseq_reader_next_encoded_char(esr); if (ISSPECIAL(currentchar)) { bufsize = firstpos = 0; } else { if (bufsize < prof->dimension) { buffer[bufsize++] = currentchar; } else { buffer[firstpos++] = currentchar; if (firstpos == prof->dimension) { firstpos = 0; } } } } gt_encseq_reader_delete(esr); gt_free(buffer); }
void gt_edistmyersbitvectorAPM(Myersonlineresources *mor, const GtUchar *pattern, GtUword patternlength, GtUword maxdistance) { GtUword Pv = ~0UL, Mv = 0UL, Eq, Xv, Xh, Ph, Mh, score; const GtUword Ebit = 1UL << (patternlength-1); GtUchar cc; GtUword pos; const GtReadmode readmode = GT_READMODE_REVERSE; GtIdxMatch match; gt_initeqsvectorrev(mor->eqsvectorrev, (GtUword) mor->alphasize, pattern,patternlength); score = patternlength; gt_encseq_reader_reinit_with_readmode(mor->esr, mor->encseq, readmode, 0); match.dbabsolute = NULL; match.dbsubstring = NULL; match.querystartpos = 0; match.querylen = patternlength; match.alignment = NULL; for (pos = 0; pos < mor->totallength; pos++) { cc = gt_encseq_reader_next_encoded_char(mor->esr); if (cc == (GtUchar) SEPARATOR) { Pv = ~0UL; Mv = 0UL; score = patternlength; } else { if (cc == (GtUchar) WILDCARD) { Eq = 0; } else { Eq = mor->eqsvectorrev[(GtUword) cc]; /* 6 */ } Xv = Eq | Mv; /* 7 */ Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq; /* 8 */ Ph = Mv | ~ (Xh | Pv); /* 9 */ Mh = Pv & Xh; /* 10 */ if (Ph & Ebit) { score++; } else { if (Mh & Ebit) { gt_assert(score > 0); score--; } } Ph <<= 1; /* 15 */ Pv = (Mh << 1) | ~ (Xv | Ph); /* 17 */ Mv = Ph & Xv; /* 18 */ if (score <= maxdistance) { GtUword dbstartpos = GT_REVERSEPOS(mor->totallength,pos); Definedunsignedlong matchlength; if (maxdistance > 0) { matchlength = gt_forwardprefixmatch(mor->encseq, mor->alphasize, dbstartpos, mor->nowildcards, mor->eqsvector, pattern, patternlength, maxdistance); } else { matchlength.defined = true; matchlength.valueunsignedlong = patternlength; } gt_assert(matchlength.defined || mor->nowildcards); if (matchlength.defined) { match.dbstartpos = dbstartpos; match.dblen = (GtUword) matchlength.valueunsignedlong; match.distance = score; mor->processmatch(mor->processmatchinfo,&match); } } } } }
double *gt_encseq_get_gc(const GtEncseq *encseq, bool with_special, bool calculate, GT_UNUSED GtError *err) { GtEncseqReader *reader; GtAlphabet *alphabet; double *gc_content; /* unit = file or sequence depending on per_file */ unsigned long char_idx, totallength, max_unit, seq_idx = 0, nextsep = 0, at_count = 0, gc_count = 0, default_count = 0; bool is_mirrored_encseq; GtUchar acgt[8], current_c; alphabet = gt_encseq_alphabet(encseq); gt_assert(gt_alphabet_is_dna(alphabet)); gt_alphabet_encode_seq(alphabet, acgt, "aAtTcCgG", 8UL); totallength = gt_encseq_total_length(encseq); reader = gt_encseq_create_reader_with_readmode(encseq, GT_READMODE_FORWARD, 0); is_mirrored_encseq = gt_encseq_is_mirrored(encseq); if (is_mirrored_encseq) { max_unit = GT_DIV2(gt_encseq_num_of_sequences(encseq)); gc_content = gt_calloc((size_t) GT_MULT2(max_unit), sizeof (double)); } else { max_unit = gt_encseq_num_of_sequences(encseq); gc_content = gt_calloc((size_t) max_unit, sizeof (double)); } nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); for (char_idx = 0; char_idx < totallength; char_idx++) { if (nextsep == char_idx) { if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } seq_idx++; nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); gt_encseq_reader_reinit_with_readmode(reader, encseq, GT_READMODE_FORWARD, char_idx + 1UL); gc_count = at_count = default_count = 0UL; continue; } current_c = gt_encseq_reader_next_encoded_char(reader); if (current_c == acgt[0] || current_c == acgt[1] || current_c == acgt[2] || current_c == acgt[3]) { at_count++; } else { if (current_c == acgt[4] || current_c == acgt[5] || current_c == acgt[6] || current_c == acgt[7]) { gc_count++; } else { default_count++; } } } if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } gt_encseq_reader_delete(reader); if (is_mirrored_encseq) { unsigned long double_max_unit = GT_MULT2(max_unit); for (seq_idx = 0; seq_idx < max_unit; seq_idx++) { gc_content[double_max_unit - seq_idx - 1] = gc_content[seq_idx]; } } return gc_content; }
enum verifyBWTSeqErrCode gt_BWTSeqVerifyIntegrity(BWTSeq *bwtSeq, const char *projectName, int checkFlags, GtUword tickPrint, FILE *fp, GtLogger *verbosity, GtError *err) { Suffixarray suffixArray; struct extBitsRetrieval extBits; bool suffixArrayIsInitialized = false, extBitsAreInitialized = false; enum verifyBWTSeqErrCode retval = VERIFY_BWTSEQ_NO_ERROR; do { GtUword seqLen; gt_assert(bwtSeq && projectName && err); gt_error_check(err); initExtBitsRetrieval(&extBits); extBitsAreInitialized = true; if (gt_mapsuffixarray(&suffixArray, SARR_SUFTAB | SARR_ESQTAB, projectName, verbosity, err)) { gt_error_set(err, "Cannot load reference suffix array project with" " demand for suffix table file and encoded sequence" " for project: %s", projectName); retval = VERIFY_BWTSEQ_REFLOAD_ERROR; break; } suffixArrayIsInitialized = true; seqLen = gt_encseq_total_length(suffixArray.encseq) + 1; if (BWTSeqLength(bwtSeq) != seqLen) { gt_error_set(err, "length mismatch for suffix array project %s and " "bwt sequence index", projectName); retval = VERIFY_BWTSEQ_LENCOMPARE_ERROR; break; } if (checkFlags & VERIFY_BWTSEQ_SUFVAL && BWTSeqHasLocateInformation(bwtSeq)) { GtUword i; for (i = 0; i < seqLen && retval == VERIFY_BWTSEQ_NO_ERROR; ++i) { if (gt_BWTSeqPosHasLocateInfo(bwtSeq, i, &extBits)) { GtUword sfxArrayValue = gt_BWTSeqLocateMatch(bwtSeq, i, &extBits); if (sfxArrayValue != ESASUFFIXPTRGET(suffixArray.suftab,i)) { gt_error_set(err, "Failed suffix array value comparison" " at position "GT_WU": "GT_WU" != "GT_WU"", i, sfxArrayValue, ESASUFFIXPTRGET(suffixArray.suftab,i)); retval = VERIFY_BWTSEQ_SUFVAL_ERROR; break; } } if (tickPrint && !((i + 1) % tickPrint)) putc('.', fp); } if (tickPrint) putc('\n', fp); if (retval != VERIFY_BWTSEQ_NO_ERROR) break; } else if (checkFlags & VERIFY_BWTSEQ_SUFVAL) { gt_error_set(err, "check of suffix array values was requested," " but index contains no locate information!"); retval = VERIFY_BWTSEQ_SUFVAL_ERROR; break; } else if (!(checkFlags & VERIFY_BWTSEQ_SUFVAL) && BWTSeqHasLocateInformation(bwtSeq)) { fputs("Not checking suftab values.\n", stderr); } if (BWTSeqHasLocateInformation(bwtSeq)) { GtUword nextLocate = BWTSeqTerminatorPos(bwtSeq); if (suffixArray.longest.defined && suffixArray.longest.valueunsignedlong != nextLocate) { gt_error_set(err, "terminator/0-rotation position mismatch "GT_WU"" " vs. "GT_WU"", suffixArray.longest.valueunsignedlong, nextLocate); retval = VERIFY_BWTSEQ_TERMPOS_ERROR; break; } if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK) && (bwtSeq->featureToggles & BWTReversiblySorted)) { GtUword i = seqLen; /* handle first symbol specially because the encseq * will not return the terminator symbol */ { Symbol sym = BWTSeqGetSym(bwtSeq, nextLocate); if (sym != UNDEFBWTCHAR) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", i - 1, (int)sym, (int)UNDEFBWTCHAR); retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR; break; } --i; nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits); } while (i > 0) { Symbol symRef = gt_encseq_get_encoded_char(suffixArray.encseq, --i, suffixArray.readmode); Symbol symCmp = BWTSeqGetSym(bwtSeq, nextLocate); if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", i, symCmp, symRef); retval = VERIFY_BWTSEQ_LFMAPWALK_ERROR; break; } nextLocate = BWTSeqLFMap(bwtSeq, nextLocate, &extBits); } if (retval != VERIFY_BWTSEQ_NO_ERROR) break; } else if ((checkFlags & VERIFY_BWTSEQ_LFMAPWALK) && !(bwtSeq->featureToggles & BWTReversiblySorted)) { gt_error_set(err, "requested complete backwards regeneration in index" " without regeneration capability"); retval = VERIFY_BWTSEQ_LFMAPWALK_IMP_ERROR; break; } } if (checkFlags & VERIFY_BWTSEQ_CONTEXT) { BWTSeqContextRetriever *bwtSeqCR = gt_BWTSeqCRLoad(bwtSeq, projectName, CTX_MAP_ILOG_AUTOSIZE); if (!bwtSeqCR) { gt_error_set(err, "cannot load BWT sequence context access table" " for project %s", projectName); retval = VERIFY_BWTSEQ_CONTEXT_LOADFAIL; break; } fputs("Checking context regeneration.\n", stderr); { GtUword i, start, subSeqLen, maxSubSeqLen = MIN(MAX(MIN_CONTEXT_LEN, seqLen/CONTEXT_FRACTION), MAX_CONTEXT_LEN), numTries = MIN(MAX_NUM_CONTEXT_CHECKS, MAX(2, seqLen/CONTEXT_INTERVAL)); Symbol *contextBuf = gt_malloc(sizeof (Symbol) * MAX_CONTEXT_LEN); GtEncseqReader *esr = gt_encseq_create_reader_with_readmode(suffixArray.encseq, suffixArray.readmode, 0); for (i = 0; i < numTries && retval == VERIFY_BWTSEQ_NO_ERROR; ++i) { GtUword j, end, inSubSeqLen; subSeqLen = random()%maxSubSeqLen + 1; start = random()%(seqLen - subSeqLen + 1); end = start + subSeqLen; inSubSeqLen = subSeqLen - ((end==seqLen)?1:0); gt_BWTSeqCRAccessSubseq(bwtSeqCR, start, subSeqLen, contextBuf); gt_encseq_reader_reinit_with_readmode(esr, suffixArray.encseq, suffixArray.readmode, start); for (j = 0; j < inSubSeqLen; ++j) { Symbol symRef = gt_encseq_reader_next_encoded_char(esr); Symbol symCmp = contextBuf[j]; if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", start + j, (int)symCmp, (int)symRef); retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL; break; } } while (j < subSeqLen) { Symbol symRef = UNDEFBWTCHAR; Symbol symCmp = contextBuf[j]; if (symCmp != symRef) { gt_error_set(err, "symbol mismatch at position "GT_WU": " "%d vs. reference symbol %d", start + j, (int)symCmp, (int)symRef); retval = VERIFY_BWTSEQ_CONTEXT_SYMFAIL; break; } ++j; } } if (retval == VERIFY_BWTSEQ_NO_ERROR) fputs("Context regeneration completed successfully.\n", stderr); gt_encseq_reader_delete(esr); gt_free(contextBuf); } gt_deleteBWTSeqCR(bwtSeqCR); } } while (0); if (suffixArrayIsInitialized) gt_freesuffixarray(&suffixArray); if (extBitsAreInitialized) destructExtBitsRetrieval(&extBits); return retval; }