Esempio n. 1
0
extern int
gt_packedindex_chk_integrity(int argc, const char *argv[], GtError *err)
{
  struct encIdxSeq *seq;
  struct chkIndexOptions params;
  GtStr *inputProject;
  int parsedArgs;
  int had_err = 0;
  Verboseinfo *verbosity = NULL;
  gt_error_check(err);

  switch (parseChkIndexOptions(&parsedArgs, argc, argv, &params, err))
  {
    case OPTIONPARSER_OK:
      break;
    case OPTIONPARSER_ERROR:
      return -1;
    case OPTIONPARSER_REQUESTS_EXIT:
      return 0;
  }

  inputProject = gt_str_new_cstr(argv[parsedArgs]);

  verbosity = newverboseinfo(params.verboseOutput);

  seq = loadEncIdxSeq(inputProject, params.encType, params.EISFeatureSet,
                      verbosity, err);
  if ((had_err = seq == NULL))
  {
    gt_error_set(err, "Failed to load index: %s", gt_str_get(inputProject));
  }
  else
  {
    fprintf(stderr, "# Using index over sequence "FormatSeqpos
            " symbols long.\n", EISLength(seq));
    {
      int corrupt
        = EISVerifyIntegrity(seq, inputProject, params.skipCount,
                             params.progressInterval, stderr,
                             params.checkFlags, verbosity, err);
      if ((had_err = corrupt != 0))
      {
        fputs(gt_error_get(err), stderr); fputs("\n", stderr);
        gt_error_set(err, "Integrity check failed for index: %s",
                  EISIntegrityCheckResultStrings[corrupt]);
      }
    }
  }
  if (seq) deleteEncIdxSeq(seq);
  if (inputProject) gt_str_delete(inputProject);
  if (verbosity) freeverboseinfo(&verbosity);
  return had_err?-1:0;
}
Esempio n. 2
0
static int gt_tyr_occratio_runner(GT_UNUSED int argc,
                                  GT_UNUSED const char **argv,
                                  GT_UNUSED int parsed_args,
                                  void *tool_arguments,
                                  GtError *err)
{
  Verboseinfo *verboseinfo;
  Tyr_occratio_options *arguments = tool_arguments;
  bool haserr = false;
  GtArrayuint64_t uniquedistribution,
                  nonuniquedistribution,
                  nonuniquemultidistribution;

  verboseinfo = newverboseinfo(arguments->verbose);
  GT_INITARRAY(&uniquedistribution,uint64_t);
  GT_INITARRAY(&nonuniquedistribution,uint64_t);
  GT_INITARRAY(&nonuniquemultidistribution,uint64_t);
  if (tyr_occratio(arguments->str_inputindex,
                   arguments->scanfile,
                   arguments->minmersize,
                   arguments->maxmersize,
                   &uniquedistribution,
                   &nonuniquedistribution,
                   &nonuniquemultidistribution,
                   verboseinfo,
                   err) != 0)
  {
    haserr = true;
  }
  if (!haserr)
  {
    showoccratios(&uniquedistribution,
                  &nonuniquedistribution,
                  &nonuniquemultidistribution,
                  arguments->outputmode,
                  arguments->outputvector);
  }
  freeverboseinfo(&verboseinfo);
  GT_FREEARRAY(&uniquedistribution,uint64_t);
  GT_FREEARRAY(&nonuniquedistribution,uint64_t);
  GT_FREEARRAY(&nonuniquemultidistribution,uint64_t);
  return haserr ? -1 : 0;
}
Esempio n. 3
0
static int gt_cge_spacedseed_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  Cge_spacedseed_options *arguments = tool_arguments;
  Verboseinfo *verboseinfo;
  bool haserr = false;

  gt_assert(parsed_args == argc);
  verboseinfo = newverboseinfo(arguments->verbose);
  if (arguments->verbose)
  {
    unsigned long idx;

    printf("# %sindex=%s\n",arguments->withesa ? "esa" : "pck",
                            gt_str_get(arguments->str_inputindex));
    for (idx = 0; idx < gt_str_array_size(arguments->queryfilenames); idx++)
    {
      printf("# queryfile=%s\n",
             gt_str_array_get(arguments->queryfilenames,idx));
    }
  }
  if (matchspacedseed(arguments->withesa,
                      arguments->docompare,
                      arguments->str_inputindex,
                      arguments->queryfilenames,
                      arguments->verbose,
                      err) != 0)
  {
    haserr = true;
  }
  freeverboseinfo(&verboseinfo);
  return haserr ? - 1 : 0;
}
Esempio n. 4
0
extern int
gt_packedindex_chk_search(int argc, const char *argv[], GtError *err)
{
  struct chkSearchOptions params;
  Suffixarray suffixarray;
  Enumpatterniterator *epi = NULL;
  bool saIsLoaded = false;
  BWTSeq *bwtSeq = NULL;
  GtStr *inputProject = NULL;
  int parsedArgs;
  bool had_err = false;
  BWTSeqExactMatchesIterator EMIter;
  bool EMIterInitialized = false;
  Verboseinfo *verbosity = NULL;
  inputProject = gt_str_new();

  do {
    gt_error_check(err);
    {
      bool exitNow = false;
      switch (parseChkBWTOptions(&parsedArgs, argc, argv, &params,
                                 inputProject, err))
      {
      case OPTIONPARSER_OK:
        break;
      case OPTIONPARSER_ERROR:
        had_err = true;
        exitNow = true;
        break;
      case OPTIONPARSER_REQUESTS_EXIT:
        exitNow = true;
        break;
      }
      if (exitNow)
        break;
    }
    gt_str_set(inputProject, argv[parsedArgs]);

    verbosity = newverboseinfo(params.verboseOutput);

    bwtSeq = availBWTSeq(&params.idx.final, verbosity, err);
    if ((had_err = bwtSeq == NULL))
      break;

    {
      enum verifyBWTSeqErrCode retval =
        BWTSeqVerifyIntegrity(bwtSeq, inputProject, params.flags,
                              params.progressInterval, stderr, verbosity, err);
      if ((had_err = (retval != VERIFY_BWTSEQ_NO_ERROR)))
      {
        fprintf(stderr, "index integrity check failed: %s\n",
                gt_error_get(err));
        gt_error_set(err, "aborted because of index integrity check fail");
        break;
      }
    }
    if (BWTSeqHasLocateInformation(bwtSeq))
    {
      if ((had_err = !initEmptyEMIterator(&EMIter, bwtSeq)))
      {
        gt_error_set(err, "Cannot create matches iterator for sequence index.");
        break;
      }
      EMIterInitialized = true;
    }
    {
      Seqpos totalLen, dbstart;
      unsigned long trial, patternLen;

      if ((had_err =
           mapsuffixarray(&suffixarray, SARR_SUFTAB | SARR_ESQTAB,
                          inputProject, NULL, err) != 0))
      {
        gt_error_set(err, "Can't load suffix array project with"
                  " demand for encoded sequence and suffix table files\n");
        break;
      }
      totalLen = getencseqtotallength(suffixarray.encseq);
      saIsLoaded = true;
      if ((had_err = (params.minPatLen >= 0L && params.maxPatLen >= 0L
                      && params.minPatLen > params.maxPatLen)))
      {
        gt_error_set(err, "Invalid pattern lengths selected: min=%ld, max=%ld;"
                  " min <= max is required.", params.minPatLen,
                  params.maxPatLen);
        break;
      }
      if (params.minPatLen < 0 || params.maxPatLen < 0)
      {
        unsigned int numofchars
          = getencseqAlphabetnumofchars(suffixarray.encseq);
        if (params.minPatLen < 0)
          params.minPatLen = recommendedprefixlength(numofchars, totalLen);
        if (params.maxPatLen < 0)
          params.maxPatLen =
            MAX(params.minPatLen,
                125 * recommendedprefixlength(numofchars, totalLen) / 100);
        else
          params.maxPatLen = MAX(params.maxPatLen, params.minPatLen);
      }
      fprintf(stderr, "Using patterns of lengths %lu to %lu\n",
              params.minPatLen, params.maxPatLen);
      if ((had_err = totalLen + 1 != BWTSeqLength(bwtSeq)))
      {
        gt_error_set(err, "base suffix array and index have diferrent lengths!"
                  FormatSeqpos" vs. "FormatSeqpos,  totalLen + 1,
                  BWTSeqLength(bwtSeq));
        break;
      }
      if ((had_err =
           (epi = newenumpatterniterator(params.minPatLen, params.maxPatLen,
                                         suffixarray.encseq,
                                         err)) == NULL))
      {
        fputs("Creation of pattern iterator failed!\n", stderr);
        break;
      }
      for (trial = 0; !had_err && trial < params.numOfSamples; ++trial)
      {
        const GtUchar *pptr = nextEnumpatterniterator(&patternLen, epi);
        MMsearchiterator *mmsi =
          newmmsearchiterator(suffixarray.encseq,
                              suffixarray.suftab,
                              0,  /* leftbound */
                              totalLen, /* rightbound */
                              0, /* offset */
                              suffixarray.readmode,
                              pptr,
                              patternLen);
        if (BWTSeqHasLocateInformation(bwtSeq))
        {
          Seqpos numMatches;
          if ((had_err = !reinitEMIterator(&EMIter, bwtSeq, pptr, patternLen,
                                           false)))
          {
            fputs("Internal error: failed to reinitialize pattern match"
                  " iterator", stderr);
            abort();
          }
          numMatches = EMINumMatchesTotal(&EMIter);
          gt_assert(numMatches == BWTSeqMatchCount(bwtSeq, pptr, patternLen,
                                                false));
          gt_assert(EMINumMatchesTotal(&EMIter) == countmmsearchiterator(mmsi));
/*        fprintf(stderr, "trial %lu, "FormatSeqpos" matches\n" */
/*                "pattern: ", trial, numMatches); */
/*        fprintfsymbolstring(stderr, suffixarray.alpha, pptr, */
/*                                patternLen); */
/*        putc('\n', stderr); */
          while (nextmmsearchiterator(&dbstart,mmsi))
          {
            Seqpos matchPos = 0;
            bool match = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq);
            if ((had_err = !match))
            {
              gt_error_set(err,
                           "matches of packedindex expired before mmsearch!");
              break;
            }
            if ((had_err = matchPos != dbstart))
            {
              gt_error_set(err, "packedindex match doesn't equal mmsearch "
                           "match result!\n"FormatSeqpos" vs. "FormatSeqpos"\n",
                           matchPos, dbstart);
            }
          }
          if (!had_err)
          {
            Seqpos matchPos;
            bool trailingMatch = EMIGetNextMatch(&EMIter, &matchPos, bwtSeq);
            if ((had_err = trailingMatch))
            {
              gt_error_set(err, "matches of mmsearch expired before fmindex!");
              break;
            }
          }
        }
        else
        {
          Seqpos numFMIMatches = BWTSeqMatchCount(bwtSeq, pptr, patternLen,
                                                  false),
            numMMSearchMatches = countmmsearchiterator(mmsi);
          if ((had_err = numFMIMatches != numMMSearchMatches))
          {
            gt_error_set(err, "Number of matches not equal for suffix array ("
                      FormatSeqpos") and fmindex ("FormatSeqpos".\n",
                      numFMIMatches, numMMSearchMatches);
          }
        }
        freemmsearchiterator(&mmsi);
        if (params.progressInterval && !((trial + 1) % params.progressInterval))
          putc('.', stderr);
      }
      if (params.progressInterval)
        putc('\n', stderr);
      fprintf(stderr, "Finished %lu of %lu matchings successfully.\n",
              trial, params.numOfSamples);
    }
  } while (0);
  if (EMIterInitialized) destructEMIterator(&EMIter);
  if (saIsLoaded) freesuffixarray(&suffixarray);
  if (epi) freeEnumpatterniterator(&epi);
  if (bwtSeq) deleteBWTSeq(bwtSeq);
  if (verbosity) freeverboseinfo(&verbosity);
  if (inputProject) gt_str_delete(inputProject);
  return had_err?-1:0;
}
Esempio n. 5
0
static int gt_tyr_mkindex_runner(GT_UNUSED int argc,
                                 GT_UNUSED const char **argv,
                                 GT_UNUSED int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  Tyr_mkindex_options *arguments = tool_arguments;
  Verboseinfo *verboseinfo;
  bool haserr = false;

  verboseinfo = newverboseinfo(arguments->verbose);
  if (arguments->verbose)
  {
    printf("# mersize=%lu\n",arguments->mersize);
    if (arguments->userdefinedminocc > 0)
    {
      printf("# minocc=%lu\n",arguments->userdefinedminocc);
    } else
    {
      printf("# minocc=undefined\n");
    }
    if (arguments->userdefinedmaxocc > 0)
    {
      printf("# maxocc=%lu\n",arguments->userdefinedmaxocc);
    } else
    {
      printf("# maxocc=undefined\n");
    }
    printf("# prefixlength=");
    if (arguments->prefixlength.flag == Autoprefixlength)
    {
      printf("automatic");
    } else
    {
      if (arguments->prefixlength.flag == Determinedprefixlength)
      {
        printf("%u",arguments->prefixlength.value);
      } else
      {
        printf("undefined");
      }
    }
    printf("\n");
    if (gt_str_length(arguments->str_storeindex) > 0)
    {
      printf("# storeindex=%s\n",gt_str_get(arguments->str_storeindex));
    }
    printf("# inputindex=%s\n",gt_str_get(arguments->str_inputindex));
  }
  if (merstatistics(arguments->str_inputindex,
                    arguments->mersize,
                    arguments->userdefinedminocc,
                    arguments->userdefinedmaxocc,
                    arguments->str_storeindex,
                    arguments->storecounts,
                    arguments->scanfile,
                    arguments->performtest,
                    verboseinfo,
                    err) != 0)
  {
    haserr = true;
  }
  if (!haserr &&
      gt_str_length(arguments->str_storeindex) > 0 &&
      arguments->prefixlength.flag != Undeterminedprefixlength)
  {
    Definedunsignedint callprefixlength;

    if (arguments->prefixlength.flag == Determinedprefixlength)
    {
      callprefixlength.defined = true;
      callprefixlength.valueunsignedint = arguments->prefixlength.value;
    } else
    {
      callprefixlength.defined = false;
    }
    if (constructmerbuckets(arguments->str_storeindex,&callprefixlength,err)
        != 0)
    {
      haserr = true;
    }
  }
  freeverboseinfo(&verboseinfo);
  return haserr ? - 1 : 0;
}
Esempio n. 6
0
int runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err)
{
  Genericindex *genericindex = NULL;
  bool haserr = false;
  Verboseinfo *verboseinfo;
  const Encodedsequence *encseq = NULL;

  verboseinfo = newverboseinfo(idxlocalioptions->verbose);

  if (idxlocalioptions->doonline)
  {
    encseq = mapencodedsequence (true,
                                 idxlocalioptions->indexname,
                                 true,
                                 false,
                                 false,
                                 true,
                                 verboseinfo,
                                 err);
    if (encseq == NULL)
    {
      haserr = true;
    }
  } else
  {
    genericindex = genericindex_new(idxlocalioptions->indexname,
                                    idxlocalioptions->withesa,
                                    idxlocalioptions->withesa ||
                                    idxlocalioptions->docompare,
                                    false,
                                    true,
                                    0,
                                    verboseinfo,
                                    err);
    if (genericindex == NULL)
    {
      haserr = true;
    } else
    {
      encseq = genericindex_getencseq(genericindex);
    }
  }
  if (!haserr)
  {
    GtSeqIterator *seqit;
    const GtUchar *query;
    unsigned long querylen;
    char *desc = NULL;
    int retval;
    Limdfsresources *limdfsresources = NULL;
    const AbstractDfstransformer *dfst;
    SWdpresource *swdpresource = NULL;
    Showmatchinfo showmatchinfo;
    Processmatch processmatch;
    void *processmatchinfoonline, *processmatchinfooffline;
    Storematchinfo storeonline, storeoffline;

    if (idxlocalioptions->docompare)
    {
      processmatch = storematch;
      initstorematch(&storeonline,encseq);
      initstorematch(&storeoffline,encseq);
      processmatchinfoonline = &storeonline;
      processmatchinfooffline = &storeoffline;
    } else
    {
      processmatch = showmatch;
      showmatchinfo.encseq = encseq;
      showmatchinfo.characters = getencseqAlphabetcharacters(encseq);
      showmatchinfo.wildcardshow = getencseqAlphabetwildcardshow(encseq);
      showmatchinfo.showalignment = idxlocalioptions->showalignment;
      processmatchinfoonline = processmatchinfooffline = &showmatchinfo;
    }
    if (idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      swdpresource = newSWdpresource(idxlocalioptions->matchscore,
                                     idxlocalioptions->mismatchscore,
                                     idxlocalioptions->gapextend,
                                     idxlocalioptions->threshold,
                                     idxlocalioptions->showalignment,
                                     processmatch,
                                     processmatchinfoonline);
    }
    dfst = locali_AbstractDfstransformer();
    if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
    {
      gt_assert(genericindex != NULL);
      limdfsresources = newLimdfsresources(genericindex,
                                           true,
                                           0,
                                           0,    /* maxpathlength */
                                           true, /* keepexpandedonstack */
                                           processmatch,
                                           processmatchinfooffline,
                                           NULL, /* processresult */
                                           NULL, /* processresult info */
                                           dfst);
    }
    seqit = gt_seqiterator_new(idxlocalioptions->queryfiles, err);
    if (!seqit)
      haserr = true;
    if (!haserr)
    {
      gt_seqiterator_set_symbolmap(seqit, getencseqAlphabetsymbolmap(encseq));
      for (showmatchinfo.queryunit = 0; /* Nothing */;
           showmatchinfo.queryunit++)
      {
        retval = gt_seqiterator_next(seqit,
                                     &query,
                                     &querylen,
                                     &desc,
                                     err);
        if (retval < 0)
        {
          haserr = true;
          break;
        }
        if (retval == 0)
        {
          break;
        }
        printf("process sequence " Formatuint64_t " of length %lu\n",
                PRINTuint64_tcast(showmatchinfo.queryunit),querylen);
        if (idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          multiapplysmithwaterman(swdpresource,encseq,query,querylen);
        }
        if (!idxlocalioptions->doonline || idxlocalioptions->docompare)
        {
          indexbasedlocali(limdfsresources,
                           idxlocalioptions->matchscore,
                           idxlocalioptions->mismatchscore,
                           idxlocalioptions->gapstart,
                           idxlocalioptions->gapextend,
                           idxlocalioptions->threshold,
                           query,
                           querylen,
                           dfst);
        }
        if (idxlocalioptions->docompare)
        {
          checkandresetstorematch(showmatchinfo.queryunit,
                                  &storeonline,&storeoffline);
        }
        gt_free(desc);
      }
      if (limdfsresources != NULL)
      {
        freeLimdfsresources(&limdfsresources,dfst);
      }
      if (swdpresource != NULL)
      {
        freeSWdpresource(swdpresource);
        swdpresource = NULL;
      }
      gt_seqiterator_delete(seqit);
    }
    if (idxlocalioptions->docompare)
    {
      freestorematch(&storeonline);
      freestorematch(&storeoffline);
    }
  }
  if (genericindex == NULL)
  {
    gt_assert(encseq != NULL);
    encodedsequence_free((Encodedsequence **) &encseq);
  } else
  {
    genericindex_delete(genericindex);
  }
  freeverboseinfo(&verboseinfo);
  return haserr ? -1 : 0;
}