예제 #1
0
int gt_callenumselfmatches(const char *indexname,
                           GtReadmode queryreadmode,
                           unsigned int userdefinedleastlength,
                           GtProcessquerymatch processquerymatch,
                           void *processquerymatchinfo,
                           GtLogger *logger,
                           GtError *err)
{
  Suffixarray suffixarray;
  bool haserr = false;

  gt_assert(queryreadmode != GT_READMODE_FORWARD);
  if (gt_mapsuffixarray(&suffixarray,
                        SARR_ESQTAB | SARR_SUFTAB | SARR_SSPTAB,
                        indexname,
                        logger,
                        err) != 0)
  {
    haserr = true;
  } else
  {
    unsigned long seqnum, numofsequences, seqlength, seqstartpos;
    GtQuerymatch *querymatchspaceptr = gt_querymatch_new();
    GtQueryrep queryrep;

    numofsequences = gt_encseq_num_of_sequences(suffixarray.encseq);
    queryrep.sequence = NULL;
    queryrep.reversecopy = false;
    queryrep.encseq = suffixarray.encseq;
    queryrep.readmode = queryreadmode;
    for (seqnum = 0; seqnum < numofsequences; seqnum++)
    {
      seqstartpos = gt_encseq_seqstartpos(suffixarray.encseq, seqnum);
      seqlength = gt_encseq_seqlength(suffixarray.encseq, seqnum);
      if (seqlength >= (unsigned long) userdefinedleastlength)
      {
        queryrep.startpos = seqstartpos;
        queryrep.length = seqlength;
        if (gt_querysubstringmatch(true,
                                   &suffixarray,
                                   (uint64_t) seqnum,
                                   &queryrep,
                                   (unsigned long) userdefinedleastlength,
                                   processquerymatch,
                                   processquerymatchinfo,
                                   querymatchspaceptr,
                                   err) != 0)
        {
          haserr = true;
          break;
        }
      }
    }
    gt_querymatch_delete(querymatchspaceptr);
  }
  gt_freesuffixarray(&suffixarray);
  return haserr ? -1 : 0;
}
예제 #2
0
static int gt_constructsarrandrunmmsearch(
                 const GtEncseq *dbencseq,
                 GtReadmode readmode,
                 unsigned int prefixlength,
                 unsigned int numofparts,
                 GtUword maximumspace,
                 const GtUchar *query,
                 GtUword querylen,
                 unsigned int minlength,
                 GtProcessquerymatch processquerymatch,
                 void *processquerymatchinfo,
                 GtTimer *sfxprogress,
                 bool withprogressbar,
                 GtLogger *logger,
                 GtError *err)
{
  bool haserr = false;
  Sfxiterator *sfi;
  Sfxstrategy sfxstrategy;

  defaultsfxstrategy(&sfxstrategy,
                     gt_encseq_bitwise_cmp_ok(dbencseq) ? false : true);
  sfxstrategy.outsuftabonfile = false;
  sfi = gt_Sfxiterator_new(dbencseq,
                           readmode,
                           prefixlength,
                           numofparts,
                           maximumspace,
                           &sfxstrategy, /* sfxstrategy */
                           sfxprogress,
                           withprogressbar,
                           logger, /* logger */
                           err);
  if (sfi == NULL)
  {
    haserr = true;
  } else
  {
    const GtSuffixsortspace *suffixsortspace;
    GtUword numberofsuffixes;
    GtQuerymatch *querymatchspaceptr = gt_querymatch_new();
    GtQueryrepresentation queryrep;

    queryrep.sequence = query;
    queryrep.encseq = NULL;
    queryrep.readmode = GT_READMODE_FORWARD;
    queryrep.startpos = 0;
    queryrep.seqlen = querylen;
    while (true)
    {
      suffixsortspace = gt_Sfxiterator_next(&numberofsuffixes,NULL,sfi);
      if (suffixsortspace == NULL)
      {
        break;
      }
      gt_querysubstringmatch(false,
                             dbencseq,
                             (const ESASuffixptr *)
                             gt_suffixsortspace_ulong_get(suffixsortspace),
                             readmode,
                             numberofsuffixes,
                             0,
                             &queryrep,
                             (GtUword) minlength,
                             processquerymatch,
                             processquerymatchinfo,
                             querymatchspaceptr);
    }
    gt_querymatch_delete(querymatchspaceptr);
  }
  if (gt_Sfxiterator_delete(sfi,err) != 0)
  {
    haserr = true;
  }
  return haserr ? -1 : 0;
}
예제 #3
0
static int gt_repfind_runner(GT_UNUSED int argc,
                             GT_UNUSED const char **argv,
                             GT_UNUSED int parsed_args,
                             void *tool_arguments, GtError *err)
{
  bool haserr = false;
  Maxpairsoptions *arguments = tool_arguments;
  GtLogger *logger = NULL;
  GtQuerymatch *querymatchspaceptr = gt_querymatch_new();
  GtXdropmatchinfo xdropmatchinfo;

  gt_error_check(err);
  xdropmatchinfo.querymatchspaceptr = querymatchspaceptr;
  xdropmatchinfo.useq = gt_seqabstract_new_empty();
  xdropmatchinfo.vseq = gt_seqabstract_new_empty();
  xdropmatchinfo.arbitscores.mat = 2;
  xdropmatchinfo.arbitscores.mis = -2;
  xdropmatchinfo.arbitscores.ins = -3;
  xdropmatchinfo.arbitscores.del = -3;
  xdropmatchinfo.frontresource = gt_frontresource_new(100UL);
  xdropmatchinfo.res = gt_xdrop_resources_new(&xdropmatchinfo.arbitscores);
  xdropmatchinfo.belowscore = 5L;
  logger = gt_logger_new(arguments->beverbose, GT_LOGGER_DEFLT_PREFIX, stdout);
  if (parsed_args < argc)
  {
    gt_error_set(err,"superfluous arguments: \"%s\"",argv[argc-1]);
    haserr = true;
  }
  if (!haserr)
  {
    if (gt_str_array_size(arguments->queryfiles) == 0)
    {
      if (arguments->samples == 0)
      {
        if (arguments->forward)
        {
          GtProcessmaxpairs processmaxpairs;
          void *processmaxpairsdata;

          if (arguments->searchspm)
          {
            processmaxpairs = gt_simplesuffixprefixmatchoutput;
            processmaxpairsdata = NULL;
          } else
          {
            if (arguments->extendseed)
            {
              processmaxpairs = gt_simplexdropselfmatchoutput;
              processmaxpairsdata = (void *) &xdropmatchinfo;
            } else
            {
              processmaxpairs = gt_simpleexactselfmatchoutput;
              processmaxpairsdata = (void *) querymatchspaceptr;
            }
          }
          if (gt_callenummaxpairs(gt_str_get(arguments->indexname),
                                  arguments->userdefinedleastlength,
                                  arguments->scanfile,
                                  processmaxpairs,
                                  processmaxpairsdata,
                                  logger,
                                  err) != 0)
          {
            haserr = true;
          }
        }
        if (!haserr && arguments->reverse)
        {
          if (gt_callenumselfmatches(gt_str_get(arguments->indexname),
                                     GT_READMODE_REVERSE,
                                     arguments->userdefinedleastlength,
                                     /*arguments->extendseed
                                       ? gt_processxdropquerymatches
                                       :*/ gt_querymatch_output,
                                     /*arguments->extendseed
                                       ? (void *) &xdropmatchinfo
                                       :*/ NULL,
                                     logger,
                                     err) != 0)
          {
            haserr = true;
          }
        }
      } else
      {
        if (gt_testmaxpairs(gt_str_get(arguments->indexname),
                            arguments->samples,
                            arguments->userdefinedleastlength,
                            (GtUword)
                            (100 * arguments->userdefinedleastlength),
                            logger,
                            err) != 0)
        {
          haserr = true;
        }
      }
    } else
    {
      if (gt_callenumquerymatches(gt_str_get(arguments->indexname),
                                  arguments->queryfiles,
                                  false,
                                  true,
                                  false,
                                  arguments->userdefinedleastlength,
                                  NULL,
                                  arguments->extendseed
                                    ? gt_processxdropquerymatches
                                    : gt_querymatch_output,
                                  arguments->extendseed
                                    ? (void *) &xdropmatchinfo
                                    : NULL,
                                  logger,
                                  err) != 0)
      {
        haserr = true;
      }
    }
  }
  gt_querymatch_delete(querymatchspaceptr);
  gt_seqabstract_delete(xdropmatchinfo.useq);
  gt_seqabstract_delete(xdropmatchinfo.vseq);
  gt_xdrop_resources_delete(xdropmatchinfo.res);
  gt_frontresource_delete(xdropmatchinfo.frontresource);
  gt_logger_delete(logger);
  return haserr ? -1 : 0;
}
예제 #4
0
static int gt_callenumquerymatches_withindex(
                            GtQuerysubstringmatchfunc findquerymatches,
                            const Suffixarray *suffixarray,
                            const GtStrArray *queryfiles,
                            bool forwardstrand,
                            bool reversestrand,
                            unsigned int userdefinedleastlength,
                            GtProcessquerybeforematching
                               processquerybeforematching,
                            GtProcessquerymatch processquerymatch,
                            void *processquerymatchinfo,
                            GtError *err)
{
  GtSeqIterator *seqit;
  bool haserr = false;

  seqit = gt_seq_iterator_sequence_buffer_new(queryfiles, err);
  if (seqit == NULL)
  {
    haserr = true;
  } else
  {
    GtQuerymatch *querymatchspaceptr = gt_querymatch_new();
    const GtUchar *query;
    unsigned long querylen;
    int retval;
    uint64_t queryunitnum;
    GtUchar *queryreverse = NULL;
    unsigned long queryreverse_length = 0;
    char *desc = NULL;
    int mode;

    gt_seq_iterator_set_symbolmap(seqit,
                    gt_alphabet_symbolmap(gt_encseq_alphabet(
                                                        suffixarray->encseq)));
    for (queryunitnum = 0; /* Nothing */; queryunitnum++)
    {
      retval = gt_seq_iterator_next(seqit, &query, &querylen, &desc, err);
      if (retval < 0)
      {
        haserr = true;
        break;
      }
      if (retval == 0)
      {
        break;
      }
      if (querylen >= (unsigned long) userdefinedleastlength)
      {
        GtQueryrep queryrep;

        queryrep.encseq = NULL;
        queryrep.readmode = GT_READMODE_FORWARD;
        queryrep.startpos = 0;
        queryrep.length = querylen;
        for (mode = 0; mode <= 1; mode++)
        {
          if (mode == 0 && forwardstrand)
          {
            queryrep.sequence = query;
            queryrep.reversecopy = false;
            if (processquerybeforematching != NULL)
            {
              processquerybeforematching(processquerymatchinfo,desc,query,
                                         querylen,true);
            }
          } else
          {
            if (mode == 1 && reversestrand)
            {
              if (querylen > queryreverse_length)
              {
                queryreverse = gt_realloc(queryreverse,
                                          sizeof (*queryreverse) * querylen);
                queryreverse_length = querylen;
              }
              gt_copy_reversecomplement(queryreverse,query,querylen);
              queryrep.sequence = queryreverse;
              queryrep.reversecopy = true;
              if (processquerybeforematching != NULL)
              {
                processquerybeforematching(processquerymatchinfo,desc,
                                           queryreverse,querylen,false);
              }
            } else
            {
              queryrep.sequence = NULL;
              queryrep.reversecopy = false;
            }
          }
          if (queryrep.sequence != NULL)
          {
            int ret = findquerymatches(false,
                                       suffixarray,
                                       queryunitnum,
                                       &queryrep,
                                       (unsigned long) userdefinedleastlength,
                                       processquerymatch,
                                       processquerymatchinfo,
                                       querymatchspaceptr,
                                       err);
            if (ret != 0)
            {
              haserr = true;
              break;
            }
          }
        }
      }
    }
    gt_seq_iterator_delete(seqit);
    gt_free(queryreverse);
    gt_querymatch_delete(querymatchspaceptr);
  }
  return haserr ? -1 : 0;
}