Exemplo n.º 1
0
/**
 * @param alphabet ownership of alphabet is with the newly produced
 * sequence object if return value is non-NULL
 */
BWTSeq *
gt_newBWTSeq(EISeq *seqIdx, MRAEnc *alphabet,
          const enum rangeSortMode *defaultRangeSort)
{
  BWTSeq *bwtSeq;
  GtUword *counts;
  size_t countsOffset, rangeSortOffset, totalSize;
  enum rangeSortMode *rangeSort;
  unsigned alphabetSize;
  gt_assert(seqIdx);
  /* alphabetSize is increased by one to handle the flattened
   * terminator symbol correctly */
  alphabetSize = gt_MRAEncGetSize(alphabet) + 1;
  countsOffset = offsetAlign(sizeof (struct BWTSeq), sizeof (GtUword));
  rangeSortOffset = offsetAlign(countsOffset
                                + sizeof (GtUword) * (alphabetSize + 1),
                                sizeof (enum rangeSortMode));
  totalSize = rangeSortOffset + sizeof (enum rangeSortMode)
    * MRAEncGetNumRanges(alphabet);
  bwtSeq = gt_malloc(totalSize);
  bwtSeq->pckbuckettable = NULL;
  counts = (GtUword *)((char  *)bwtSeq + countsOffset);
  rangeSort = (enum rangeSortMode *)((char *)bwtSeq + rangeSortOffset);
  if (!initBWTSeqFromEncSeqIdx(bwtSeq, seqIdx, alphabet, counts, rangeSort,
                               defaultRangeSort))
  {
    gt_free(bwtSeq);
    bwtSeq = NULL;
  }
  return bwtSeq;
}
Exemplo n.º 2
0
extern void
BWTSeqInitLocateHandling(BWTSeq *bwtSeq,
                         const enum rangeSortMode *defaultRangeSort)
{
  struct encIdxSeq *seqIdx;
  struct locateHeader locHeader;
  gt_assert(bwtSeq);
  seqIdx = bwtSeq->seqIdx;
  if (!readLocateInfoHeader(seqIdx, &locHeader)
      || !locHeader.locateInterval)
  {
    gt_log_log("Index does not contain locate information.\n"
            "Localization of matches will not be supported!");
    bwtSeq->locateSampleInterval = 0;
    bwtSeq->featureToggles = BWTBaseFeatures;
  }
  else
  {
    bwtSeq->locateSampleInterval = locHeader.locateInterval;
    bwtSeq->rot0Pos = locHeader.rot0Pos;
    /* FIXME: this really deserves its own header */
    bwtSeq->featureToggles = locHeader.featureToggles;

    if (readRankSortHeader(seqIdx, &bwtSeq->bitsPerOrigRank,
                           bwtSeq->alphabet, bwtSeq->rangeSort))
      ;
    else
    {
      AlphabetRangeID numRanges = MRAEncGetNumRanges(bwtSeq->alphabet);
      bwtSeq->bitsPerOrigRank = 0;
      memcpy(bwtSeq->rangeSort, defaultRangeSort,
             numRanges * sizeof (defaultRangeSort[0]));
    }
  }
}
Exemplo n.º 3
0
static inline int
sortModeHeaderNeeded(const MRAEnc *alphabet,
                     const enum rangeSortMode *rangeSort,
                     const SpecialsRankLookup *sprTable)
{
  bool hasRankSortedRanges = false;
  AlphabetRangeID i, numRanges = MRAEncGetNumRanges(alphabet);
  for (i = 0; i < numRanges; ++i)
    hasRankSortedRanges |= (rangeSort[i] == SORTMODE_RANK?1:0);
  return (hasRankSortedRanges && sprTable);
}
Exemplo n.º 4
0
MRAEnc *
gt_MRAEncSecondaryMapping(const MRAEnc *srcAlpha, int selection,
                       const int *rangeSel, Symbol fallback)
{
  MRAEnc *newAlpha;
  switch (srcAlpha->encType)
  {
  case sourceUInt8:
    {
      GT_UNUSED const MRAEncUInt8 *ui8alpha;
      uint8_t *mappings, destSym;
      AlphabetRangeSize *newRanges, sym;
      AlphabetRangeID range, numRanges = MRAEncGetNumRanges(srcAlpha);
      ui8alpha = constMRAEnc2MRAEncUInt8(srcAlpha);
      mappings = gt_malloc(sizeof (uint8_t) * (UINT8_MAX + 1));
      memset(mappings, UNDEF_UCHAR, UINT8_MAX+1);
      newRanges = gt_malloc(sizeof (newRanges[0]) * numRanges);
      sym = 0;
      destSym = 0;
      for (range = 0; range < numRanges; ++range)
      {
        if (rangeSel[range] == selection)
        {
          for (; sym < srcAlpha->rangeEndIndices[range]; ++sym)
            mappings[sym] = destSym++;
          newRanges[range] = srcAlpha->symbolsPerRange[range];
        }
        else
        {
          for (; sym < srcAlpha->rangeEndIndices[range]; ++sym)
            mappings[sym] = fallback;
          newRanges[range] = 0;
        }
      }
      newAlpha = gt_newMultiRangeAlphabetEncodingUInt8(numRanges, newRanges,
                                                    mappings);
      gt_free(mappings);
      gt_free(newRanges);
    }
    break;
  default:
    abort();
    break;
  }
  return newAlpha;
}
Exemplo n.º 5
0
static int
writeRankSortHeader(FILE *fp, void *cbData)
{
  struct sortModeHeader *headerData = cbData;
  gt_assert(cbData);
  if (fwrite(&headerData->bitsPerOrigRank, sizeof (headerData->bitsPerOrigRank),
             1, fp) != 1)
    return 0;
  {
    size_t i, numRanges = MRAEncGetNumRanges(headerData->alphabet);
    for (i = 0; i < numRanges; ++i)
    {
      int16_t mode = headerData->rangeSort[i];
      if (fwrite(&mode, sizeof (mode), 1, fp) != 1)
        return 0;
    }
  }
  return 1;
}
Exemplo n.º 6
0
static inline int
readRankSortHeader(EISeq *seqIdx, uint32_t *bitsPerOrigRank,
                   const MRAEnc *alphabet,
                   enum rangeSortMode *rangeSort)
{
  FILE *fp;
  gt_assert(seqIdx && alphabet && bitsPerOrigRank && rangeSort);
  if (!(fp = EISSeekToHeader(seqIdx, RANK_SORT_HEADERID, NULL)))
    return 0;
  if (fread(bitsPerOrigRank, sizeof (*bitsPerOrigRank), 1, fp) != 1)
    return 0;
  {
    uint16_t mode;
    size_t i, numRanges = MRAEncGetNumRanges(alphabet);
    for (i = 0; i < numRanges; ++i)
    {
      if (fread(&mode, sizeof (mode), 1, fp) != 1)
        return 0;
      rangeSort[i] = mode;
    }
  }
  return 1;
}
Exemplo n.º 7
0
GtUword gt_bwtrangesplitallwithspecial(Mbtab *mbtab,
                                             GtUword *rangeOccs,
                                             const FMindex *voidBwtSeq,
                                             GtUword lbound,
                                             GtUword ubound)
{
  GtUword char_idx, range_idx, rangebase;
  const BWTSeq *bwtseq = (const BWTSeq *) voidBwtSeq;
  const MRAEnc *alphabet = BWTSeqGetAlphabet(bwtseq);
  AlphabetRangeID numofranges = MRAEncGetNumRanges(alphabet);
  AlphabetRangeSize rangesize = 0, totalrange = 0;

  for (range_idx = 0; range_idx < (GtUword) numofranges; range_idx++)
  {
    GtUword rangeOcc_idx = 0;
    rangesize = MRAEncGetRangeSize(alphabet, range_idx);
    totalrange += rangesize;
    BWTSeqPosPairRangeOcc(bwtseq, range_idx, lbound, ubound,rangeOccs);
    rangebase = (GtUword) MRAEncGetRangeBase(alphabet, range_idx);
    for (char_idx = rangebase;
         char_idx < rangebase + rangesize; char_idx++)
    {
      if (rangeOccs[rangeOcc_idx] < rangeOccs[rangesize+rangeOcc_idx])
      {
        mbtab[char_idx].lowerbound = bwtseq->count[char_idx] +
                                     rangeOccs[rangeOcc_idx];
        mbtab[char_idx].upperbound = bwtseq->count[char_idx] +
                                     rangeOccs[rangesize+rangeOcc_idx];
      } else
      {
        mbtab[char_idx].lowerbound = mbtab[char_idx].upperbound = 0;
      }
      rangeOcc_idx++;
    }
  }
  return totalrange;
}
Exemplo n.º 8
0
static inline uint32_t
computeSortModeHeaderSize(const MRAEnc *alphabet)
{
  return sizeof (uint32_t) + sizeof (int16_t) * MRAEncGetNumRanges(alphabet);
}