/** * @param alphabet ownership of alphabet is with the newly produced * sequence object if return value is non-NULL */ BWTSeq * gt_newBWTSeq(EISeq *seqIdx, MRAEnc *alphabet, const enum rangeSortMode *defaultRangeSort) { BWTSeq *bwtSeq; GtUword *counts; size_t countsOffset, rangeSortOffset, totalSize; enum rangeSortMode *rangeSort; unsigned alphabetSize; gt_assert(seqIdx); /* alphabetSize is increased by one to handle the flattened * terminator symbol correctly */ alphabetSize = gt_MRAEncGetSize(alphabet) + 1; countsOffset = offsetAlign(sizeof (struct BWTSeq), sizeof (GtUword)); rangeSortOffset = offsetAlign(countsOffset + sizeof (GtUword) * (alphabetSize + 1), sizeof (enum rangeSortMode)); totalSize = rangeSortOffset + sizeof (enum rangeSortMode) * MRAEncGetNumRanges(alphabet); bwtSeq = gt_malloc(totalSize); bwtSeq->pckbuckettable = NULL; counts = (GtUword *)((char *)bwtSeq + countsOffset); rangeSort = (enum rangeSortMode *)((char *)bwtSeq + rangeSortOffset); if (!initBWTSeqFromEncSeqIdx(bwtSeq, seqIdx, alphabet, counts, rangeSort, defaultRangeSort)) { gt_free(bwtSeq); bwtSeq = NULL; } return bwtSeq; }
extern void BWTSeqInitLocateHandling(BWTSeq *bwtSeq, const enum rangeSortMode *defaultRangeSort) { struct encIdxSeq *seqIdx; struct locateHeader locHeader; gt_assert(bwtSeq); seqIdx = bwtSeq->seqIdx; if (!readLocateInfoHeader(seqIdx, &locHeader) || !locHeader.locateInterval) { gt_log_log("Index does not contain locate information.\n" "Localization of matches will not be supported!"); bwtSeq->locateSampleInterval = 0; bwtSeq->featureToggles = BWTBaseFeatures; } else { bwtSeq->locateSampleInterval = locHeader.locateInterval; bwtSeq->rot0Pos = locHeader.rot0Pos; /* FIXME: this really deserves its own header */ bwtSeq->featureToggles = locHeader.featureToggles; if (readRankSortHeader(seqIdx, &bwtSeq->bitsPerOrigRank, bwtSeq->alphabet, bwtSeq->rangeSort)) ; else { AlphabetRangeID numRanges = MRAEncGetNumRanges(bwtSeq->alphabet); bwtSeq->bitsPerOrigRank = 0; memcpy(bwtSeq->rangeSort, defaultRangeSort, numRanges * sizeof (defaultRangeSort[0])); } } }
static inline int sortModeHeaderNeeded(const MRAEnc *alphabet, const enum rangeSortMode *rangeSort, const SpecialsRankLookup *sprTable) { bool hasRankSortedRanges = false; AlphabetRangeID i, numRanges = MRAEncGetNumRanges(alphabet); for (i = 0; i < numRanges; ++i) hasRankSortedRanges |= (rangeSort[i] == SORTMODE_RANK?1:0); return (hasRankSortedRanges && sprTable); }
MRAEnc * gt_MRAEncSecondaryMapping(const MRAEnc *srcAlpha, int selection, const int *rangeSel, Symbol fallback) { MRAEnc *newAlpha; switch (srcAlpha->encType) { case sourceUInt8: { GT_UNUSED const MRAEncUInt8 *ui8alpha; uint8_t *mappings, destSym; AlphabetRangeSize *newRanges, sym; AlphabetRangeID range, numRanges = MRAEncGetNumRanges(srcAlpha); ui8alpha = constMRAEnc2MRAEncUInt8(srcAlpha); mappings = gt_malloc(sizeof (uint8_t) * (UINT8_MAX + 1)); memset(mappings, UNDEF_UCHAR, UINT8_MAX+1); newRanges = gt_malloc(sizeof (newRanges[0]) * numRanges); sym = 0; destSym = 0; for (range = 0; range < numRanges; ++range) { if (rangeSel[range] == selection) { for (; sym < srcAlpha->rangeEndIndices[range]; ++sym) mappings[sym] = destSym++; newRanges[range] = srcAlpha->symbolsPerRange[range]; } else { for (; sym < srcAlpha->rangeEndIndices[range]; ++sym) mappings[sym] = fallback; newRanges[range] = 0; } } newAlpha = gt_newMultiRangeAlphabetEncodingUInt8(numRanges, newRanges, mappings); gt_free(mappings); gt_free(newRanges); } break; default: abort(); break; } return newAlpha; }
static int writeRankSortHeader(FILE *fp, void *cbData) { struct sortModeHeader *headerData = cbData; gt_assert(cbData); if (fwrite(&headerData->bitsPerOrigRank, sizeof (headerData->bitsPerOrigRank), 1, fp) != 1) return 0; { size_t i, numRanges = MRAEncGetNumRanges(headerData->alphabet); for (i = 0; i < numRanges; ++i) { int16_t mode = headerData->rangeSort[i]; if (fwrite(&mode, sizeof (mode), 1, fp) != 1) return 0; } } return 1; }
static inline int readRankSortHeader(EISeq *seqIdx, uint32_t *bitsPerOrigRank, const MRAEnc *alphabet, enum rangeSortMode *rangeSort) { FILE *fp; gt_assert(seqIdx && alphabet && bitsPerOrigRank && rangeSort); if (!(fp = EISSeekToHeader(seqIdx, RANK_SORT_HEADERID, NULL))) return 0; if (fread(bitsPerOrigRank, sizeof (*bitsPerOrigRank), 1, fp) != 1) return 0; { uint16_t mode; size_t i, numRanges = MRAEncGetNumRanges(alphabet); for (i = 0; i < numRanges; ++i) { if (fread(&mode, sizeof (mode), 1, fp) != 1) return 0; rangeSort[i] = mode; } } return 1; }
GtUword gt_bwtrangesplitallwithspecial(Mbtab *mbtab, GtUword *rangeOccs, const FMindex *voidBwtSeq, GtUword lbound, GtUword ubound) { GtUword char_idx, range_idx, rangebase; const BWTSeq *bwtseq = (const BWTSeq *) voidBwtSeq; const MRAEnc *alphabet = BWTSeqGetAlphabet(bwtseq); AlphabetRangeID numofranges = MRAEncGetNumRanges(alphabet); AlphabetRangeSize rangesize = 0, totalrange = 0; for (range_idx = 0; range_idx < (GtUword) numofranges; range_idx++) { GtUword rangeOcc_idx = 0; rangesize = MRAEncGetRangeSize(alphabet, range_idx); totalrange += rangesize; BWTSeqPosPairRangeOcc(bwtseq, range_idx, lbound, ubound,rangeOccs); rangebase = (GtUword) MRAEncGetRangeBase(alphabet, range_idx); for (char_idx = rangebase; char_idx < rangebase + rangesize; char_idx++) { if (rangeOccs[rangeOcc_idx] < rangeOccs[rangesize+rangeOcc_idx]) { mbtab[char_idx].lowerbound = bwtseq->count[char_idx] + rangeOccs[rangeOcc_idx]; mbtab[char_idx].upperbound = bwtseq->count[char_idx] + rangeOccs[rangesize+rangeOcc_idx]; } else { mbtab[char_idx].lowerbound = mbtab[char_idx].upperbound = 0; } rangeOcc_idx++; } } return totalrange; }
static inline uint32_t computeSortModeHeaderSize(const MRAEnc *alphabet) { return sizeof (uint32_t) + sizeof (int16_t) * MRAEncGetNumRanges(alphabet); }