Esempio n. 1
0
static GtUword gt_mmsearch_extendright(const GtEncseq *dbencseq,
                                             GtEncseqReader *esr,
                                             GtReadmode readmode,
                                             GtUword totallength,
                                             GtUword dbend,
                                             const GtQuerysubstring
                                               *querysubstring,
                                             GtUword matchlength)
{
  GtUchar dbchar;
  GtUword dbpos, querypos;

  if (dbend < totallength)
  {
    gt_encseq_reader_reinit_with_readmode(esr,dbencseq,readmode,dbend);
  }
  for (dbpos = dbend, querypos = querysubstring->currentoffset + matchlength;
       dbpos < totallength &&
       querypos < querysubstring->queryrep->seqlen;
       dbpos++, querypos++)
  {
    dbchar = gt_encseq_reader_next_encoded_char(esr);
    if (ISSPECIAL(dbchar) ||
        dbchar != gt_mmsearch_accessquery(querysubstring->queryrep,querypos))
    {
      break;
    }
  }
  return dbpos - dbend;
}
Esempio n. 2
0
static bool gt_mmsearch_isleftmaximal(const GtEncseq *dbencseq,
                                      GtReadmode readmode,
                                      GtUword dbstart,
                                      const GtQuerysubstring *querysubstring)
{
  GtUchar dbleftchar;

  if (dbstart == 0 || querysubstring->currentoffset == 0)
  {
    return true;
  }
  dbleftchar = gt_encseq_get_encoded_char(dbencseq, /* Random access */
                                          dbstart-1,
                                          readmode);
  if (ISSPECIAL(dbleftchar) ||
      dbleftchar != gt_mmsearch_accessquery(querysubstring->queryrep,
                                            querysubstring->currentoffset-1))
  {
    return true;
  }
  return false;
}
Esempio n. 3
0
static void gt_querysubstringmatch(bool selfmatch,
                                   const GtEncseq *dbencseq,
                                   const ESASuffixptr *suftabpart,
                                   GtReadmode readmode,
                                   GtUword numberofsuffixes,
                                   uint64_t queryunitnum,
                                   GtQueryrepresentation *queryrep,
                                   GtUword minmatchlength,
                                   GtProcessquerymatch processquerymatch,
                                   void *processquerymatchinfo,
                                   GtQuerymatch *querymatchspaceptr)
{
  GtMMsearchiterator *mmsi;
  GtUword totallength, localqueryoffset = 0;
  uint64_t localqueryunitnum = queryunitnum;
  GtQuerysubstring querysubstring;

  gt_assert(numberofsuffixes > 0);
  totallength = gt_encseq_total_length(dbencseq);
  querysubstring.queryrep = queryrep;
  for (querysubstring.currentoffset = 0;
       querysubstring.currentoffset <= queryrep->seqlen - minmatchlength;
       querysubstring.currentoffset++)
  {
    GtUword dbstart;

    mmsi = gt_mmsearchiterator_new(dbencseq,
                                   suftabpart,
                                   0, /* leftbound */
                                   numberofsuffixes - 1, /* rightbound */
                                   0, /* offset */
                                   readmode,
                                   &querysubstring,
                                   minmatchlength);
    while (gt_mmsearchiterator_next(&dbstart,mmsi))
    {
      if (gt_mmsearch_isleftmaximal(dbencseq,
                                    readmode,
                                    dbstart,
                                    &querysubstring))
      {
        GtUword dbseqnum, dbseqstartpos, dbseqlen, extend;

        extend = gt_mmsearch_extendright(dbencseq,
                                         mmsi->esr,
                                         readmode,
                                         totallength,
                                         dbstart + minmatchlength,
                                         &querysubstring,
                                         minmatchlength);

        if (gt_encseq_has_multiseq_support(dbencseq))
        {
          dbseqnum = gt_encseq_seqnum(dbencseq,dbstart);
          dbseqstartpos = gt_encseq_seqstartpos(dbencseq,dbseqnum);
          dbseqlen = gt_encseq_seqlength(dbencseq,dbseqnum);
        } else
        {
          dbseqnum = dbseqstartpos = dbseqlen = 0;
        }
        gt_querymatch_init(querymatchspaceptr,
                           minmatchlength + extend,
                           dbstart,
                           dbseqnum,
                           dbstart - dbseqstartpos,
                           dbseqlen,
                           0, /* score */
                           0, /* edist */
                           selfmatch,
                           localqueryunitnum,
                           minmatchlength + extend,
                           localqueryoffset,
                           queryrep->seqlen);
        processquerymatch(processquerymatchinfo,querymatchspaceptr);
      }
    }
    gt_mmsearchiterator_delete(mmsi);
    mmsi = NULL;
    if (gt_mmsearch_accessquery(queryrep,querysubstring.currentoffset)
        == (GtUchar) SEPARATOR)
    {
      localqueryunitnum++;
      localqueryoffset = 0;
    } else
    {
      localqueryoffset++;
    }
  }
}
Esempio n. 4
0
static int gt_querysubstringmatch_generic(
                                     bool selfmatch,
                                     const GtEncseq *dbencseq,
                                     const ESASuffixptr *suftabpart,
                                     GtReadmode readmode,
                                     unsigned long numberofsuffixes,
                                     uint64_t queryunitnum,
                                     const GtQueryrep *queryrep,
                                     unsigned long minmatchlength,
                                     GtProcessquerymatch processquerymatch,
                                     void *processquerymatchinfo,
                                     GtQuerymatch *querymatchspaceptr,
                                     GtError *err)
{
  GtMMsearchiterator *mmsi;
  unsigned long totallength, localqueryoffset = 0;
  uint64_t localqueryunitnum = queryunitnum;
  GtQuerysubstring querysubstring;
  bool haserr = false;

  gt_assert(numberofsuffixes > 0);
  totallength = gt_encseq_total_length(dbencseq);
  querysubstring.queryrep = queryrep;
  for (querysubstring.offset = 0;
       querysubstring.offset <= queryrep->length - minmatchlength;
       querysubstring.offset++)
  {
    unsigned long dbstart;

    mmsi = gt_mmsearchiterator_new_generic(dbencseq,
                                           suftabpart,
                                           0, /* leftbound */
                                           numberofsuffixes-1, /* rightbound */
                                           0, /* offset */
                                           readmode,
                                           &querysubstring,
                                           minmatchlength);
    while (!haserr && gt_mmsearchiterator_next(&dbstart,mmsi))
    {
      if (gt_mmsearch_isleftmaximal(dbencseq,
                                    readmode,
                                    dbstart,
                                    &querysubstring))
      {
        unsigned long extend = gt_mmsearch_extendright(dbencseq,
                                                       mmsi->esr,
                                                       readmode,
                                                       totallength,
                                                       dbstart + minmatchlength,
                                                       &querysubstring,
                                                       minmatchlength);
        gt_querymatch_fill(querymatchspaceptr,
                           minmatchlength + extend,
                           dbstart,
                           queryrep->readmode,
                           queryrep->reversecopy,
                           0, /* score */
                           0, /* edist */
                           selfmatch,
                           localqueryunitnum,
                           minmatchlength + extend,
                           localqueryoffset);
        if (processquerymatch(processquerymatchinfo,
                              dbencseq,
                              querymatchspaceptr,
                              queryrep->sequence,
                              queryrep->length,
                              err) != 0)
        {
          haserr = true;
        }
      }
    }
    gt_mmsearchiterator_delete(mmsi);
    mmsi = NULL;
    if (!haserr)
    {
      if (gt_mmsearch_accessquery(queryrep,querysubstring.offset)
          == (GtUchar) SEPARATOR)
      {
        localqueryunitnum++;
        localqueryoffset = 0;
      } else
      {
        localqueryoffset++;
      }
    }
  }
  return haserr ? -1 : 0;
}