Exemple #1
0
unsigned long gt_contfind_bottomup(Sequentialsuffixarrayreader *ssar,
                     bool show_progressbar, GtBitsequence *contained,
                     unsigned long firstrevcompl,
                     unsigned long read_length /* 0 = variable */)
{
  ContfindBUstate state;
  unsigned long totallength;
  GT_UNUSED int retval;

  gt_assert(ssar != NULL);
  gt_assert(contained != NULL);

  state.contained = contained;
  state.encseq = gt_encseqSequentialsuffixarrayreader(ssar);
  totallength = gt_encseq_total_length(state.encseq);
  state.nofsequences = gt_encseq_num_of_sequences(state.encseq);

  if (read_length == 0)
  {
    prepare_sspbittab_and_shortest(totallength, &state);
  }
  else
  {
    state.shortest = read_length;
    state.spacing = read_length + 1;
  }

  state.show_progressbar = show_progressbar;
  state.csize            = 0;
  state.cmin             = 0;
  state.firstrevcompl    = firstrevcompl;
  state.counter          = 0;

  if (show_progressbar)
  {
    state.progress = 0;
    gt_progressbar_start(&(state.progress),
        (unsigned long long)totallength);
  }

  retval = (read_length == 0)
      ? gt_esa_bottomup_rdjcv(ssar, &state, NULL)
      : gt_esa_bottomup_rdjce(ssar, &state, NULL);
  gt_assert(retval == 0);

  if (show_progressbar)
    gt_progressbar_stop();
  if (read_length == 0)
    gt_free(state.sspbittab);
  return state.counter;
}
Exemple #2
0
static int callenummaxpairs(const char *indexname,
                            unsigned int userdefinedleastlength,
                            bool scanfile,
                            Processmaxpairs processmaxpairs,
                            void *processmaxpairsinfo,
                            GtLogger *logger,
                            GtError *err)
{
  bool haserr = false;
  Sequentialsuffixarrayreader *ssar;

  gt_error_check(err);
  ssar = gt_newSequentialsuffixarrayreaderfromfile(indexname,
                                                   SARR_LCPTAB |
                                                   SARR_SUFTAB |
                                                   SARR_ESQTAB |
                                                   SARR_SSPTAB,
                                                   scanfile ? SEQ_scan
                                                            : SEQ_mappedboth,
                                                   logger,
                                                   err);
  if (ssar == NULL)
  {
    haserr = true;
  }
  if (!haserr &&
      gt_enumeratemaxpairs(ssar,
                           gt_encseqSequentialsuffixarrayreader(ssar),
                           gt_readmodeSequentialsuffixarrayreader(ssar),
                           userdefinedleastlength,
                           processmaxpairs,
                           processmaxpairsinfo,
                           err) != 0)
  {
    haserr = true;
  }
  if (ssar != NULL)
  {
    gt_freeSequentialsuffixarrayreader(&ssar);
  }
  return haserr ? -1 : 0;
}
Exemple #3
0
static int computeoccurrenceratio(Sequentialsuffixarrayreader *ssar,
                                  unsigned long minmersize,
                                  unsigned long maxmersize,
                                  GtArrayuint64_t *uniquedistribution,
                                  GtArrayuint64_t *nonuniquedistribution,
                                  GtArrayuint64_t *nonuniquemultidistribution,
                                  GtLogger *logger,
                                  GtError *err)
{
    OccDfsstate *state;
    bool haserr = false;

    gt_error_check(err);
    state = gt_malloc(sizeof (*state));
    state->encseq = gt_encseqSequentialsuffixarrayreader(ssar);
    state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar);
    state->totallength = gt_encseq_total_length(state->encseq);
    state->minmersize = minmersize;
    state->maxmersize = maxmersize;
    state->uniquedistribution = uniquedistribution;
    state->nonuniquedistribution = nonuniquedistribution;
    state->nonuniquemultidistribution = nonuniquemultidistribution;
    if (gt_depthfirstesa(ssar,
                         occ_allocateDfsinfo,
                         occ_freeDfsinfo,
                         occ_processleafedge,
                         NULL,
                         occ_processcompletenode,
                         occ_assignleftmostleaf,
                         occ_assignrightmostleaf,
                         (Dfsstate*) state,
                         logger,
                         err) != 0)
    {
        haserr = true;
    }
    gt_free(state);
    return haserr ? -1 : 0;
}
static int enumeratelcpintervals(const char *inputindex,
                                 Sequentialsuffixarrayreader *ssar,
                                 const char *storeindex,
                                 bool storecounts,
                                 GtUword mersize,
                                 GtUword minocc,
                                 GtUword maxocc,
                                 bool performtest,
                                 GtLogger *logger,
                                 GtError *err)
{
  TyrDfsstate *state;
  bool haserr = false;
  unsigned int alphasize;

  gt_error_check(err);
  state = gt_malloc(sizeof (*state));
  GT_INITARRAY(&state->occdistribution,Countwithpositions);
  state->esrspace = gt_encseq_create_reader_with_readmode(
                                   gt_encseqSequentialsuffixarrayreader(ssar),
                                   gt_readmodeSequentialsuffixarrayreader(ssar),
                                   0);
  state->mersize = (GtUword) mersize;
  state->encseq = gt_encseqSequentialsuffixarrayreader(ssar);
  alphasize = gt_alphabet_num_of_chars(gt_encseq_alphabet(state->encseq));
  state->readmode = gt_readmodeSequentialsuffixarrayreader(ssar);
  state->storecounts = storecounts;
  state->minocc = minocc;
  state->maxocc = maxocc;
  state->totallength = gt_encseq_total_length(state->encseq);
  state->performtest = performtest;
  state->countoutputmers = 0;
  state->merindexfpout = NULL;
  state->countsfilefpout = NULL;
  GT_INITARRAY(&state->largecounts,Largecount);
  if (strlen(storeindex) == 0)
  {
    state->sizeofbuffer = 0;
    state->bytebuffer = NULL;
  } else
  {
    state->sizeofbuffer = MERBYTES(mersize);
    state->bytebuffer = gt_malloc(sizeof *state->bytebuffer
                                  * state->sizeofbuffer);
  }
  if (performtest)
  {
    state->currentmer = gt_malloc(sizeof *state->currentmer
                                  * state->mersize);
    state->suftab = gt_suftabSequentialsuffixarrayreader(ssar);
  } else
  {
    state->currentmer = NULL;
    state->suftab = NULL;
  }
  if (state->mersize > state->totallength)
  {
    gt_error_set(err,"mersize "GT_WU" > "GT_WU" = totallength not allowed",
                 state->mersize,
                 state->totallength);
    haserr = true;
  } else
  {
    if (strlen(storeindex) == 0)
    {
      state->processoccurrencecount = adddistpos2distribution;
    } else
    {
      state->merindexfpout = gt_fa_fopen_with_suffix(storeindex,MERSUFFIX,
                                                    "wb",err);
      if (state->merindexfpout == NULL)
      {
        haserr = true;
      } else
      {
        if (state->storecounts)
        {
          state->countsfilefpout
            = gt_fa_fopen_with_suffix(storeindex,COUNTSSUFFIX,"wb",err);
          if (state->countsfilefpout == NULL)
          {
            haserr = true;
          }
        }
      }
      state->processoccurrencecount = outputsortedstring2index;
    }
    if (!haserr)
    {
      if (gt_depthfirstesa(ssar,
                          tyr_allocateDfsinfo,
                          tyr_freeDfsinfo,
                          tyr_processleafedge,
                          NULL,
                          tyr_processcompletenode,
                          tyr_assignleftmostleaf,
                          tyr_assignrightmostleaf,
                          (Dfsstate*) state,
                          logger,
                          err) != 0)
      {
        haserr = true;
      }
      if (strlen(storeindex) == 0)
      {
        showfinalstatistics(state,inputindex,logger);
      }
    }
    if (!haserr)
    {
      if (state->countsfilefpout != NULL)
      {
        gt_logger_log(logger,"write "GT_WU" mercounts > "GT_WU
                      " to file \"%s%s\"",
                      state->largecounts.nextfreeLargecount,
                      (GtUword) MAXSMALLMERCOUNT,
                      storeindex,
                      COUNTSSUFFIX);
        gt_xfwrite(state->largecounts.spaceLargecount, sizeof (Largecount),
                  (size_t) state->largecounts.nextfreeLargecount,
                  state->countsfilefpout);
      }
    }
    if (!haserr)
    {
      gt_logger_log(logger,"number of "GT_WU"-mers in index: "GT_WU"",
                  mersize,
                  state->countoutputmers);
      gt_logger_log(logger,"index size: %.2f megabytes\n",
                  GT_MEGABYTES(state->countoutputmers * state->sizeofbuffer +
                               sizeof (GtUword) * EXTRAINTEGERS));
    }
  }
  /* now out EXTRAINTEGERS integer values */
  if (!haserr && state->merindexfpout != NULL)
  {
    outputbytewiseUlongvalue(state->merindexfpout,
                             (GtUword) state->mersize);
    outputbytewiseUlongvalue(state->merindexfpout,(GtUword) alphasize);
  }
  gt_fa_xfclose(state->merindexfpout);
  gt_fa_xfclose(state->countsfilefpout);
  GT_FREEARRAY(&state->occdistribution,Countwithpositions);
  gt_free(state->currentmer);
  gt_free(state->bytebuffer);
  GT_FREEARRAY(&state->largecounts,Largecount);
  gt_encseq_reader_delete(state->esrspace);
  gt_free(state);
  return haserr ? -1 : 0;
}