Ejemplo n.º 1
0
extern EISeq *
createEncIdxSeq(const GtStr *projectName,
                const struct seqBaseParam *params,
                size_t numExtHeaders, const uint16_t *headerIDs,
                const uint32_t *extHeaderSizes,
                headerWriteFunc *extHeaderCallbacks, void **headerCBData,
                bitInsertFunc biFunc, BitOffset cwExtBitsPerPos,
                varExtBitsEstimator biVarBits, void *cbState,
                Verboseinfo *verbosity, GtError *err)
{
  Suffixarray suffixArray;
  struct encIdxSeq *newSeqIdx;
  Seqpos length;
  gt_assert(projectName);
  /* map and interpret index project file */
  if (streamsuffixarray(&suffixArray,
                       SARR_SUFTAB | SARR_BWTTAB, projectName, verbosity, err))
    return NULL;
  length = getencseqtotallength(suffixArray.encseq) + 1;
  newSeqIdx = createEncIdxSeqFromSA(&suffixArray, length,
                                      projectName, params,
                                      numExtHeaders, headerIDs,
                                      extHeaderSizes, extHeaderCallbacks,
                                      headerCBData, biFunc, cwExtBitsPerPos,
                                      biVarBits, cbState, err);
  freesuffixarray(&suffixArray);
  return newSeqIdx;
}
Ejemplo n.º 2
0
static void testmulticharactercompare(const Encodedsequence *encseq,
                                      Readmode readmode,
                                      unsigned long multicharcmptrials)
{
  Encodedsequencescanstate *esr1, *esr2;
  Seqpos pos1, pos2, totallength;
  unsigned long trial;
  bool fwd = ISDIRREVERSE(readmode) ? false : true,
       complement = ISDIRCOMPLEMENT(readmode) ? true : false;

  esr1 = newEncodedsequencescanstate();
  esr2 = newEncodedsequencescanstate();
  totallength = getencseqtotallength(encseq);
  srand48(42349421);
  (void) multicharactercompare_withtest(encseq,fwd,complement,esr1,0,esr2,0);
  (void) multicharactercompare_withtest(encseq,fwd,complement,esr1,0,esr2,
                                        totallength-1);
  (void) multicharactercompare_withtest(encseq,fwd,complement,esr1,
                                        totallength-1,esr2,0);
  (void) multicharactercompare_withtest(encseq,fwd,complement,esr1,
                                        totallength-1,esr2,totallength-1);
  for (trial = 0; trial < multicharcmptrials; trial++)
  {
    pos1 = (Seqpos) (drand48() * (double) totallength);
    pos2 = (Seqpos) (drand48() * (double) totallength);
    (void) multicharactercompare_withtest(encseq,fwd,complement,
                                          esr1,pos1,esr2,pos2);
  }
  freeEncodedsequencescanstate(&esr1);
  freeEncodedsequencescanstate(&esr2);
}
Ejemplo n.º 3
0
static void runscanatpostrial(const Encodedsequence *encseq,
                              Encodedsequencescanstate *esr,
                              Readmode readmode,Seqpos startpos)
{
  Seqpos pos, totallength;
  GtUchar ccra, ccsr;

  totallength = getencseqtotallength(encseq);
  initEncodedsequencescanstate(esr,encseq,readmode,startpos);
  for (pos=startpos; pos < totallength; pos++)
  {
    ccra = getencodedchar(encseq,pos,readmode); /* Random access */
    ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode);
    if (ccra != ccsr)
    {
      fprintf(stderr,"startpos = " FormatSeqpos
                     " access=%s, mode=%s: position=" FormatSeqpos
                     ": random access (correct) = %u != %u = "
                     " sequential read (wrong)\n",
                     PRINTSeqposcast(startpos),
                     encseqaccessname(encseq),
                     showreadmode(readmode),
                     PRINTSeqposcast(pos),
                     (unsigned int) ccra,
                     (unsigned int) ccsr);
      exit(GT_EXIT_PROGRAMMING_ERROR);
    }
  }
}
Ejemplo n.º 4
0
static Seqpos *leftcontextofspecialchardist(unsigned int numofchars,
                                            const Encodedsequence *encseq,
                                            Readmode readmode)
{
  GtUchar cc;
  unsigned int idx;
  Seqpos *specialchardist, totallength = getencseqtotallength(encseq);

  specialchardist = gt_malloc(sizeof(*specialchardist) * numofchars);
  for (idx = 0; idx<numofchars; idx++)
  {
    specialchardist[idx] = 0;
  }
  if (hasspecialranges(encseq))
  {
    Specialrangeiterator *sri;
    Sequencerange range;

    sri = newspecialrangeiterator(encseq,true);
    if (ISDIRREVERSE(readmode))
    {
      Readmode thismode = (readmode == Reversemode) ? Forwardmode
                                                    : Complementmode;
      while (nextspecialrangeiterator(&range,sri))
      {
        if (range.rightpos < totallength)
        {
          cc = getencodedchar(encseq,range.rightpos,thismode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    } else
    {
      while (nextspecialrangeiterator(&range,sri))
      {
        gt_assert(range.leftpos < totallength);
        if (range.leftpos > 0)
        {
          cc = getencodedchar(encseq,range.leftpos-1,readmode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    }
    freespecialrangeiterator(&sri);
  }
  if (getencseqlengthofspecialsuffix(encseq) == 0)
  {
    cc = getencodedchar(encseq,totallength-1,readmode);
    gt_assert(ISNOTSPECIAL(cc));
    specialchardist[cc]++;
  }
  return specialchardist;
}
Ejemplo n.º 5
0
int test_trieins(bool onlyins,const GtStr *indexname,GtError *err)
{
  Suffixarray suffixarray;
  bool haserr = false;
  Seqpos totallength = 0;

  gt_error_check(err);
  if (streamsuffixarray(&suffixarray,
                        SARR_ESQTAB,
                        indexname,
                        NULL,
                        err) != 0)
  {
    haserr = true;
  } else
  {
    totallength = getencseqtotallength(suffixarray.encseq);
  }
  if (!haserr)
  {
    Mergertrierep trierep;
    const GtUchar *characters;

    ALLOCASSIGNSPACE(trierep.encseqreadinfo,NULL,Encseqreadinfo,1);
    trierep.encseqreadinfo[0].encseqptr = suffixarray.encseq;
    trierep.encseqreadinfo[0].readmode = suffixarray.readmode;
    characters = getencseqAlphabetcharacters(suffixarray.encseq);
    mergertrie_initnodetable(&trierep,totallength,1U);
    maketrie(&trierep,characters,totallength);
    if (onlyins)
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showtrie(&trierep,characters);
#endif
      checktrie(&trierep,totallength+1,totallength,err);
#endif
    } else
    {
#ifdef WITHTRIEIDENT
#ifdef WITHTRIESHOW
      showallnoderelations(trierep.root);
#endif
#endif
      successivelydeletesmallest(&trierep,totallength,characters,err);
    }
    mergertrie_delete(&trierep);
  }
  freesuffixarray(&suffixarray);
  return haserr ? -1 : 0;
}
Ejemplo n.º 6
0
extern EISeq *
loadEncIdxSeq(const GtStr *projectName,
              enum seqBaseEncoding encType, int features,
              Verboseinfo *verbosity, GtError *err)
{
  struct encIdxSeq *newSeqIdx = NULL;
  Suffixarray suffixArray;
  Seqpos len;
  do
  {
    if (streamsuffixarray(&suffixArray, 0, projectName, verbosity, err))
      break;
    len = getencseqtotallength(suffixArray.encseq) + 1;
    newSeqIdx = loadEncIdxSeqForSA(&suffixArray, len, projectName,
                                   encType, features, err);
    freesuffixarray(&suffixArray);
  } while (0);
  return newSeqIdx;
}
Ejemplo n.º 7
0
static void testscanatpos(const Encodedsequence *encseq,
                          Readmode readmode,
                          unsigned long scantrials)
{
  Encodedsequencescanstate *esr = NULL;
  Seqpos startpos, totallength;
  unsigned long trial;

  totallength = getencseqtotallength(encseq);
  srand48(42349421);
  esr = newEncodedsequencescanstate();
  runscanatpostrial(encseq,esr,readmode,0);
  runscanatpostrial(encseq,esr,readmode,totallength-1);
  for (trial = 0; trial < scantrials; trial++)
  {
    startpos = (Seqpos) (drand48() * (double) totallength);
    printf("trial %lu at " FormatSeqpos "\n",trial,PRINTSeqposcast(startpos));
    runscanatpostrial(encseq,esr,readmode,startpos);
  }
  freeEncodedsequencescanstate(&esr);
}
Ejemplo n.º 8
0
unsigned long suffixarrayuniqueforward (const void *genericindex,
                                        unsigned long offset,
                                        Seqpos left,
                                        Seqpos right,
                                        GT_UNUSED Seqpos *witnessposition,
                                        const GtUchar *qstart,
                                        const GtUchar *qend)
{
    Simplelcpinterval itv;
    const GtUchar *qptr;
    const Suffixarray *suffixarray = (const Suffixarray *) genericindex;
    Seqpos totallength;

    itv.left = left;
    itv.right = right;
    totallength = getencseqtotallength(suffixarray->encseq);
    for (qptr = qstart; /* Nothing */; qptr++, offset++)
    {
        if (itv.left < itv.right)
        {
            if (qptr >= qend || ISSPECIAL(*qptr) ||
                    !lcpintervalfindcharchildintv(suffixarray->encseq,
                                                  suffixarray->readmode,
                                                  totallength,
                                                  suffixarray->suftab,
                                                  &itv,
                                                  *qptr,
                                                  (Seqpos) offset,
                                                  itv.left,
                                                  itv.right))
            {
                break;
            }
        } else
        {
            return offset;
        }
    }
    return 0;
}
Ejemplo n.º 9
0
unsigned long suffixarraymstats (const void *genericindex,
                                 unsigned long offset,
                                 Seqpos left,
                                 Seqpos right,
                                 Seqpos *witnessposition,
                                 const GtUchar *qstart,
                                 const GtUchar *qend)
{
    Simplelcpinterval itv;
    const GtUchar *qptr;
    const Suffixarray *suffixarray = (const Suffixarray *) genericindex;
    Seqpos totallength;

    itv.left = left;
    itv.right = right;
    totallength = getencseqtotallength(suffixarray->encseq);
    for (qptr = qstart; /* Nothing */; qptr++, offset++)
    {
        gt_assert(itv.left <= itv.right);
        if (qptr >= qend || ISSPECIAL(*qptr) ||
                !lcpintervalfindcharchildintv(suffixarray->encseq,
                                              suffixarray->readmode,
                                              totallength,
                                              suffixarray->suftab,
                                              &itv,
                                              *qptr,
                                              (Seqpos) offset,
                                              itv.left,itv.right))
        {
            if (witnessposition != NULL)
            {
                *witnessposition = suffixarray->suftab[itv.left];
            }
            break;
        }
    }
    return offset;
}
Ejemplo n.º 10
0
Definedunsignedlong forwardprefixmatch(const Encodedsequence *encseq,
                                       unsigned int alphasize,
                                       Seqpos startpos,
                                       bool nowildcards,
                                       unsigned long *eqsvector,
                                       const GtUchar *useq,
                                       unsigned long ulen,
                                       unsigned long maxdistance)
{
  DECLARELOCALVARS;
  Seqpos pos, totallength = getencseqtotallength(encseq);
  GtUchar cc;
  Definedunsignedlong result;

  initeqsvector(eqsvector,(unsigned long) alphasize,useq,ulen);
  gt_assert(maxdistance > 0);
  for (pos = startpos; /* Nothing */; pos++)
  {
    gt_assert(pos - startpos <= (Seqpos) (ulen + maxdistance));
    cc = getencodedchar(encseq,pos,Forwardmode);
    if (nowildcards && cc == (GtUchar) WILDCARD)
    {
      result.defined = false;
      result.valueunsignedlong = 0;
      return result;
    }
    COMPUTENEWDIST(cc);
    if (distval <= maxdistance || pos == totallength-1)
    {
      break;
    }
  }
  result.defined = true;
  result.valueunsignedlong = (unsigned long) (pos - startpos + 1);
  return result;
}
Ejemplo n.º 11
0
static int testfullscan(const GtStrArray *filenametab,
                        const Encodedsequence *encseq,
                        Readmode readmode,
                        GtError *err)
{
  Seqpos pos, totallength;
  GtUchar ccscan = 0, ccra, ccsr;
  GtSequenceBuffer *fb = NULL;
  int retval;
  bool haserr = false;
  Encodedsequencescanstate *esr;
  unsigned long long fullscanpbar = 0;

  gt_error_check(err);
  totallength = getencseqtotallength(encseq);
  gt_progressbar_start(&fullscanpbar,(unsigned long long) totallength);
  if (filenametab != NULL)
  {
    fb = gt_sequence_buffer_new_guess_type((GtStrArray*) filenametab, err);
    if (!fb)
      haserr = true;
    if (!haserr)
      gt_sequence_buffer_set_symbolmap(fb, getencseqAlphabetsymbolmap(encseq));
  }
  if (!haserr) {
    esr = newEncodedsequencescanstate();
    initEncodedsequencescanstate(esr,encseq,readmode,0);
    for (pos=0; /* Nothing */; pos++)
    {
      if (filenametab != NULL && readmode == Forwardmode)
      {
        retval = gt_sequence_buffer_next(fb,&ccscan,err);
        if (retval < 0)
        {
          haserr = true;
          break;
        }
        if (retval == 0)
        {
          break;
        }
      } else
      {
        if (pos >= totallength)
        {
          break;
        }
      }
      ccra = getencodedchar(encseq,pos,readmode); /* Random access */
      if (filenametab != NULL && readmode == Forwardmode)
      {
        if (ccscan != ccra)
        {
          gt_error_set(err,"access=%s, position=" FormatSeqpos
                            ": scan (readnextchar) = %u != "
                            "%u = random access",
                            encseqaccessname(encseq),
                            pos,
                            (unsigned int) ccscan,
                            (unsigned int) ccra);
          haserr = true;
          break;
        }
      }
      ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode);
      if (ccra != ccsr)
      {
        gt_error_set(err,"access=%s, mode=%s: position=" FormatSeqpos
                          ": random access = %u != %u = sequential read",
                          encseqaccessname(encseq),
                          showreadmode(readmode),
                          pos,
                          (unsigned int) ccra,
                          (unsigned int) ccsr);
        haserr = true;
        break;
      }
      fullscanpbar++;
    }
    gt_progressbar_stop();
  }
  if (!haserr)
  {
    if (pos != totallength)
    {
      gt_error_set(err,"sequence length must be " FormatSeqpos " but is "
                         FormatSeqpos,totallength,pos);
      haserr = true;
    }
  }
  freeEncodedsequencescanstate(&esr);
  gt_sequence_buffer_delete(fb);
  return haserr ? -1 : 0;
}
Ejemplo n.º 12
0
void gt_copysortsuffixes(const GtBucketspec2 *bucketspec2,
                         Seqpos *suftab,
                         Verboseinfo *verboseinfo)
{
  Seqpos hardwork = 0, **targetptr;
  unsigned int idx, idxsource, source, second;

#ifdef WITHSUFFIXES
  {
    const Seqpos *ptr;
    for (ptr = suftab; ptr < suftab + bucketspec2->partwidth; ptr++)
    {
      showsequenceatstartpos(stdout,
                             ISDIRREVERSE(readmode) ? false : true,
                             ISDIRCOMPLEMENT(readmode) ? true : false,
                             encseq,
                             *ptr);
    }
  }
#endif
  targetptr = gt_malloc(sizeof(*targetptr) * bucketspec2->numofchars);
  for (idxsource = 0; idxsource<bucketspec2->numofchars; idxsource++)
  {
    source = bucketspec2->order[idxsource];
    for (second = 0; second < bucketspec2->numofchars; second++)
    {
      if (!bucketspec2->subbuckettab[source][second].sorted && source != second)
      {
        gt_assert(bucketspec2->subbuckettab[source][second].hardworktodo);
        showverbose(verboseinfo,"hard work for %u %u",source,second);
        hardwork += getendidx(bucketspec2,source,second) -
                    getstartidx(bucketspec2,source,second);
        bucketspec2->subbuckettab[source][second].sorted = true;
      } else
      {
        gt_assert(!bucketspec2->subbuckettab[source][second].hardworktodo);
      }
    }
    if (getstartidx(bucketspec2,source,0) <
        getstartidx(bucketspec2,source,source))
    {
      for (idx = 0; idx < bucketspec2->numofchars; idx++)
      {
        targetptr[idx] = suftab + getstartidx(bucketspec2,idx,source);
      }
      forwardderive(bucketspec2,
                    targetptr,
                    source,
                    suftab + getstartidx(bucketspec2,source,0));
    }
    if (getendidx(bucketspec2,source,source) <
        getendidx(bucketspec2,source,bucketspec2->numofchars))
    {
      for (idx = 0; idx < bucketspec2->numofchars; idx++)
      {
        targetptr[idx] = suftab + getendidx(bucketspec2,idx,source) - 1;
      }
      backwardderive(bucketspec2,
                     targetptr,
                     source,
                     suftab +
                     getendidx(bucketspec2,source,bucketspec2->numofchars) - 1);
    }
    for (idx = 0; idx < bucketspec2->numofchars; idx++)
    {
      bucketspec2->subbuckettab[idx][source].sorted = true;
    }
    bucketspec2->superbuckettab[source].sorted = true;
  }
  gt_free(targetptr);
  showverbose(verboseinfo,"hardwork = " FormatSeqpos " (%.2f)",
            PRINTSeqposcast(hardwork),
            (double) hardwork/getencseqtotallength(bucketspec2->encseq));
}
Ejemplo n.º 13
0
extern EISeq *
createBWTSeqGeneric(const struct bwtParam *params, indexCreateFunc createIndex,
                    SASeqSrc *src,
                    const enum rangeSortMode rangeSort[],
                    const SpecialsRankLookup *sprTable,
                    GtError *err)
{
  struct encIdxSeq *baseSeqIdx = NULL;
  struct addLocateInfoState varState;
  bool varStateIsInitialized = false;
  unsigned locateInterval;
  BWTSeqContextRetrieverFactory *buildContextMap = NULL;
  gt_assert(src && params && err);
  gt_error_check(err);
  locateInterval = params->locateInterval;
  do
  {
    struct locateHeaderWriteInfo locHeaderData
      = { src, locateInterval, params->featureToggles };
    struct sortModeHeader sortModeHeader;
    void *p[] = { &locHeaderData , &sortModeHeader };
    uint16_t headerIDs[] = { LOCATE_INFO_IN_INDEX_HEADERID,
                             RANK_SORT_HEADERID };
    uint32_t headerSizes[] = { LOCATE_HEADER_SIZE,
                               0 };
    headerWriteFunc headerFuncs[] = { writeLocateInfoHeader,
                                      writeRankSortHeader };
    size_t numHeaders = 0;
    unsigned bitsPerOrigRank = 0;
    Seqpos totalLen = SASSGetLength(src);
    const MRAEnc *alphabet = SASSGetMRAEnc(src);
    MRAEnc *baseAlphabet = SASSNewMRAEnc(src);
    /* FIXME: this  has to work also when locateInterval == 0 and
     * sprTable != NULL */
    if (params->ctxMapILog != CTX_MAP_ILOG_NOMAP)
      buildContextMap = newBWTSeqContextRetrieverFactory(totalLen,
                                                         params->ctxMapILog);
    if (locateInterval)
    {
      ++numHeaders;
      if (sortModeHeaderNeeded(alphabet, rangeSort, sprTable))
      {
        Seqpos
#ifndef NDEBUG
          origSeqLen = getencseqtotallength(SPRTGetOrigEncseq(sprTable)),
#endif
          maxRank;
        gt_assert(origSeqLen == totalLen - 1);
        maxRank = specialsRank(sprTable, totalLen - 1);
        bitsPerOrigRank = sortModeHeader.bitsPerOrigRank
          = requiredSeqposBits(maxRank);
        sortModeHeader.alphabet = alphabet;
        sortModeHeader.rangeSort = rangeSort;
        headerSizes[1] = computeSortModeHeaderSize(alphabet);
        ++numHeaders;
      }
      {
        SeqDataReader readSfxIdx = SASSCreateReader(src, SFX_REQUEST_SUFTAB);
        if (SDRIsValid(readSfxIdx))
        {
          initAddLocateInfoState(
            &varState, SASSGetOrigSeqAccessor(src), readSfxIdx,
            alphabet, SASSGetSeqStats(src), rangeSort, totalLen, params,
            bitsPerOrigRank?sprTable:NULL, bitsPerOrigRank,
            buildContextMap);
          varStateIsInitialized = true;
        }
        else
        {
          gt_error_set(err, "error: locate sampling requested but not available"
                    " for project %s\n", gt_str_get(params->projectName));
        }
      }
    }
    if (!(baseSeqIdx
          = createIndex(totalLen, params->projectName, baseAlphabet,
                        SASSGetSeqStats(src),
                        SASSCreateReader(src, SFX_REQUEST_BWTTAB),
                        &params->seqParams, numHeaders,
                        headerIDs, headerSizes, headerFuncs, p,
                        locateInterval?addLocateInfo:NULL,
                        /* one bit per position if using bitmap */
                        (params->featureToggles & BWTLocateBitmap)?1:0,
                        locateInterval?locBitsUpperBounds:NULL, &varState,
                        err)))
      break;
    if (buildContextMap)
    {
      if (!BWTSCRFFinished(buildContextMap))
      {
        fputs("error: context table construction incomplete!\n", stderr);
      }
      else
      {
        BWTSeqContextRetriever *ctxRetrieve =
          BWTSCRFGet(buildContextMap, NULL, params->projectName);
        deleteBWTSeqCR(ctxRetrieve);
      }
    }
  } while (0);
  if (buildContextMap)
    deleteBWTSeqContextRetrieverFactory(buildContextMap);
  if (varStateIsInitialized)
    destructAddLocateInfoState(&varState);
  return baseSeqIdx;
}
Ejemplo n.º 14
0
static int inputsuffixarray(bool map,
                            Suffixarray *suffixarray,
                            unsigned int demand,
                            const GtStr *indexname,
                            Verboseinfo *verboseinfo,
                            GtError *err)
{
  bool haserr = false;
  Seqpos totallength = 0;

  gt_error_check(err);
  initsuffixarray(suffixarray);
  suffixarray->encseq = mapencodedsequence(true,
                                           indexname,
                                           (demand & SARR_ESQTAB) ? true
                                                                  : false,
                                           (demand & SARR_DESTAB) ? true
                                                                  : false,
                                           (demand & SARR_SDSTAB) ? true
                                                                  : false,
                                           (demand & SARR_SSPTAB) ? true
                                                                  : false,
                                           verboseinfo,
                                           err);
  if (suffixarray->encseq == NULL)
  {
    haserr = true;
  } else
  {
    totallength = getencseqtotallength(suffixarray->encseq);
  }
  if (!haserr)
  {
    haserr = scanprjfileuintkeys(suffixarray,indexname,verboseinfo,err);
  }
  if (!haserr && (demand & SARR_SUFTAB))
  {
    if (map)
    {
      suffixarray->suftab = genericmaptable(indexname,
                                            SUFTABSUFFIX,
                                            (unsigned long) (totallength+1),
                                            sizeof (Seqpos),
                                            err);
      if (suffixarray->suftab == NULL)
      {
        haserr = true;
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->suftabstream,Seqpos,
                       SUFTABSUFFIX);
    }
    if (!haserr && !suffixarray->longest.defined)
    {
      gt_error_set(err,"longest not defined");
      haserr = true;
    }
  }
  if (!haserr && (demand & SARR_LCPTAB))
  {
    if (map)
    {
      suffixarray->lcptab = genericmaptable(indexname,
                                            LCPTABSUFFIX,
                                            (unsigned long) (totallength+1),
                                            sizeof (GtUchar),
                                            err);
      if (suffixarray->lcptab == NULL)
      {
        haserr = true;
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar,
                       LCPTABSUFFIX);
      if (!haserr &&
          fseek(suffixarray->lcptabstream.fp,(long) sizeof (GtUchar),SEEK_SET))
      {
        gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno));
        haserr = true;
      }
    }
    if (!haserr && !suffixarray->numoflargelcpvalues.defined)
    {
      gt_error_set(err,"numoflargelcpvalues not defined");
      haserr = true;
    }
    if (!haserr && suffixarray->numoflargelcpvalues.valueseqpos > 0)
    {
      if (map)
      {
        suffixarray->llvtab
          = genericmaptable(indexname,
                            LARGELCPTABSUFFIX,
                            (unsigned long) suffixarray->numoflargelcpvalues.
                            valueseqpos,
                            sizeof (Largelcpvalue),
                            err);
        if (suffixarray->llvtab == NULL)
        {
          haserr = true;
        }
      } else
      {
        INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue,
                         LARGELCPTABSUFFIX);
      }
    }
  }
  if (!haserr && (demand & SARR_BWTTAB))
  {
    if (map)
    {
      suffixarray->bwttab = genericmaptable(indexname,
                                            BWTTABSUFFIX,
                                            (unsigned long) (totallength+1),
                                            sizeof (GtUchar),
                                            err);
      if (suffixarray->bwttab == NULL)
      {
        haserr = true;
      }
    } else
    {
      INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar,
                       BWTTABSUFFIX);
    }
  }
  if (!haserr && (demand & SARR_BCKTAB))
  {
    if (map)
    {
      suffixarray->bcktab = mapbcktab(indexname,
                                      getencseqAlphabetnumofchars(suffixarray->
                                                                  encseq),
                                      suffixarray->prefixlength,
                                      err);
      if (suffixarray->bcktab == NULL)
      {
        haserr = true;
      }
    } else
    {
      gt_error_set(err,"cannot stream bcktab");
      haserr = true;
    }
  }
  if (haserr)
  {
    freesuffixarray(suffixarray);
  }
  return haserr ? -1 : 0;
}
Ejemplo n.º 15
0
extern int
gt_packedindex_chk_search(int argc, const char *argv[], GtError *err)
{
  struct chkSearchOptions params;
  Suffixarray suffixarray;
  Enumpatterniterator *epi = NULL;
  bool saIsLoaded = false;
  BWTSeq *bwtSeq = NULL;
  GtStr *inputProject = NULL;
  int parsedArgs;
  bool had_err = false;
  BWTSeqExactMatchesIterator EMIter;
  bool EMIterInitialized = false;
  Verboseinfo *verbosity = NULL;
  inputProject = gt_str_new();

  do {
    gt_error_check(err);
    {
      bool exitNow = false;
      switch (parseChkBWTOptions(&parsedArgs, argc, argv, &params,
                                 inputProject, err))
      {
      case OPTIONPARSER_OK:
        break;
      case OPTIONPARSER_ERROR:
        had_err = true;
        exitNow = true;
        break;
      case OPTIONPARSER_REQUESTS_EXIT:
        exitNow = true;
        break;
      }
      if (exitNow)
        break;
    }
    gt_str_set(inputProject, argv[parsedArgs]);

    verbosity = newverboseinfo(params.verboseOutput);

    bwtSeq = availBWTSeq(&params.idx.final, verbosity, err);
    if ((had_err = bwtSeq == NULL))
      break;

    {
      enum verifyBWTSeqErrCode retval =
        BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags,
                              params.progressInterval, stderr, verbosity, err);
      if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR)))
      {
        fprintf(stderr, "index integrity check failed: %s\n",
                gt_error_get(err));
        gt_error_set(err, "aborted because of index integrity check fail");
        break;
      }
    }
    if (BWTSeqHasLocateInformation(bwtSeq))
    {
      if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq)))
      {
        gt_error_set(err, "Cannot create matches iterator for sequence index.");
        break;
      }
      EMIterInitialized = true;
    }
    {
      Seqpos totalLen, dbstart;
      unsigned long trial, patternLen;

      if ((had_err =
           mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB,
                          inputProject, NULL, err) != 0))
      {
        gt_error_set(err, "Can't load suffix array project with"
                  " demand for encoded sequence and suffix table files\n");
        break;
      }
      totalLen = getencseqtotallength(suffixarray.encseq);
      saIsLoaded = true;
      if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L
                      && params.minPatLen > params.maxPatLen)))
      {
        gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;"
                  " min <= max is required.", params.minPatLen,
                  params.maxPatLen);
        break;
      }
      if (params.minPatLen < 0 || params.maxPatLen < 0)
      {
        unsigned int numofchars
          = getencseqAlphabetnumofchars(suffixarray.encseq);
        if (params.minPatLen < 0)
          params.minPatLen = recommendedprefixlength(numofchars, totalLen);
        if (params.maxPatLen < 0)
          params.maxPatLen =
            MAX(params.minPatLen,
                125 * recommendedprefixlength(numofchars, totalLen) / 100);
        else
          params.maxPatLen = MAX(params.maxPatLen, params.minPatLen);
      }
      fprintf(stderr, "Using patterns of lengths %lu to %lu\n",
              params.minPatLen, params.maxPatLen);
      if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq)))
      {
        gt_error_set(err, "base suffix array and index have diferrent lengths!"
                  FormatSeqpos" vs. "FormatSeqpos,  totalLen + 1,
                  BWTSeqLength(bwtSeq));
        break;
      }
      if ((had_err =
           (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen,
                                         suffixarray.encseq,
                                         err)) == NULL))
      {
        fputs("Creation of pattern iterator failed!\n", stderr);
        break;
      }
      for (trial = 0; !had_err && trial < params.numOfSamples; ++trial)
      {
        const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi);
        MMsearchiterator *mmsi =
          newmmsearchiterator(suffixarray.encseq,
                              suffixarray.suftab,
                              0,  /* leftbound */
                              totalLen, /* rightbound */
                              0, /* offset */
                              suffixarray.readmode,
                              pptr,
                              patternLen);
        if (BWTSeqHasLocateInformation(bwtSeq))
        {
          Seqpos numMatches;
          if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen,
                                           false)))
          {
            fputs("Internal error: failed to reinitialize pattern match"
                  " iterator", stderr);
            abort();
          }
          numMatches = EMINumMatchesTotal(&EMIter);
          gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen,
                                                false));
          gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi));
/*        fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */
/*                "pattern: ", trial, numMatches); */
/*        fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */
/*                                patternLen); */
/*        putc('\n', stderr); */
          while (nextmmsearchiterator(&dbstart,mmsi))
          {
            Seqpos matchPos = 0;
            bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq);
            if ((had_err = !match))
            {
              gt_error_set(err,
                           "matches of packedindex expired before mmsearch!");
              break;
            }
            if ((had_err = matchPos != dbstart))
            {
              gt_error_set(err, "packedindex match doesn't equal mmsearch "
                           "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n",
                           matchPos, dbstart);
            }
          }
          if (!had_err)
          {
            Seqpos matchPos;
            bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq);
            if ((had_err = trailingMatch))
            {
              gt_error_set(err, "matches of mmsearch expired before fmindex!");
              break;
            }
          }
        }
        else
        {
          Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen,
                                                  false),
            numMMSearchMatches = countmmsearchiterator(mmsi);
          if ((had_err = numFMIMatches != numMMSearchMatches))
          {
            gt_error_set(err, "Number of matches not equal for suffix array ("
                      FormatSeqpos") and fmindex ("FormatSeqpos".\n",
                      numFMIMatches, numMMSearchMatches);
          }
        }
        freemmsearchiterator(&mmsi);
        if (params.progressInterval && !((trial + 1) % params.progressInterval))
          putc('.', stderr);
      }
      if (params.progressInterval)
        putc('\n', stderr);
      fprintf(stderr, "Finished %lu of %lu matchings successfully.\n",
              trial, params.numOfSamples);
    }
  } while (0);
  if (EMIterInitialized) destructEMIterator(&EMIter);
  if (saIsLoaded) freesuffixarray(&suffixarray);
  if (epi) freeEnumpatterniterator(&epi);
  if (bwtSeq) deleteBWTSeq(bwtSeq);
  if (verbosity) freeverboseinfo(&verbosity);
  if (inputProject) gt_str_delete(inputProject);
  return had_err?-1:0;
}