Exemple #1
0
GtUword gt_alignment_eval_generic(bool mapped,bool downcase,
                                  const GtAlignment *alignment)
{
  GtUword i, j, idx_u = 0, idx_v = 0, sumcost = 0, meoplen;
  GtMultieop meop;

  gt_assert(alignment != NULL && (!mapped || !downcase));
#ifndef NDEBUG
  gt_assert(gt_alignment_is_valid(alignment));
#endif

  meoplen = gt_multieoplist_get_num_entries(alignment->eops);
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop.type) {
      case Mismatch:
        sumcost += meop.steps;
        idx_u += meop.steps;
        idx_v += meop.steps;
        break;
      case Match:
      case Replacement:
        for (j = 0; j < meop.steps; j++) {
          GtUchar a = alignment->u[idx_u],
                  b = alignment->v[idx_v];
          if (mapped)
          {
            if (ISSPECIAL(a) || ISSPECIAL(b) || a != b)
            {
              sumcost++;
            }
          } else
          {
            if (downcase)
            {
              a = tolower((int) a);
              b = tolower((int) b);
            }
            if (a != b)
            {
              sumcost++;
            }
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        sumcost += meop.steps;
        idx_u += meop.steps;
        break;
      case Insertion:
        sumcost += meop.steps;
        idx_v += meop.steps;
        break;
    }
  }
  return sumcost;
}
Exemple #2
0
static GtCodetype qgram2codefillspecial(unsigned int numofchars,
                                      unsigned int kmersize,
                                      const GtEncseq *encseq,
                                      GtReadmode readmode,
                                      GtUword startpos,
                                      GtUword totallength)
{
  GtCodetype integercode;
  GtUword pos;
  bool foundspecial;
  GtUchar cc;

  if (startpos >= totallength)
  {
    integercode = (GtCodetype) (numofchars - 1);
    foundspecial = true;
  } else
  {
    /* for testing */
    cc = gt_encseq_get_encoded_char(encseq,startpos,readmode);
    if (ISSPECIAL(cc))
    {
      integercode = (GtCodetype) (numofchars - 1);
      foundspecial = true;
    } else
    {
      integercode = (GtCodetype) cc;
      foundspecial = false;
    }
  }
  for (pos = startpos + 1; pos < startpos + kmersize; pos++)
  {
    if (foundspecial)
    {
      ADDNEXTCHAR(integercode,numofchars-1,numofchars);
    } else
    {
      if (pos >= totallength)
      {
        ADDNEXTCHAR(integercode,numofchars-1,numofchars);
        foundspecial = true;
      } else
      {
        /* for testing */
        cc = gt_encseq_get_encoded_char(encseq,pos,readmode);
        if (ISSPECIAL(cc))
        {
          ADDNEXTCHAR(integercode,numofchars-1,numofchars);
          foundspecial = true;
        } else
        {
          ADDNEXTCHAR(integercode,cc,numofchars);
        }
      }
    }
  }
  return integercode;
}
GtUword gt_skfmmstats (const void *genericindex,
                          GT_UNUSED GtUword offset,
                          GT_UNUSED GtUword left,
                          GT_UNUSED GtUword right,
                          GtUword *witnessposition,
                          const GtUchar *qstart,
                          const GtUchar *qend)
{
  GtUchar cc;
  const GtUchar *qptr;
  GtUword prevlbound;
  GtUword matchlength;
  GtUlongBound bwtbound;
  const Fmindex *fmindex = (Fmindex *) genericindex;

  gt_assert(qstart < qend);
  qptr = qstart;
  cc = *qptr;
  if (ISSPECIAL(cc))
  {
    return 0;
  }
  bwtbound.lbound = fmindex->tfreq[cc];
  bwtbound.ubound = fmindex->tfreq[cc+1];
  if (bwtbound.lbound >= bwtbound.ubound)
  {
    return 0;
  }
  prevlbound = bwtbound.lbound;
  for (qptr++; qptr < qend; qptr++)
  {
    cc = *qptr;
    if (ISSPECIAL (cc))
    {
      break;
    }
    bwtbound.lbound = fmindex->tfreq[cc] +
                      fmoccurrence (fmindex, cc, bwtbound.lbound);
    bwtbound.ubound = fmindex->tfreq[cc] +
                      fmoccurrence (fmindex, cc, bwtbound.ubound);
    if (bwtbound.lbound >= bwtbound.ubound)
    {
      break;
    }
    prevlbound = bwtbound.lbound;
  }
  matchlength = (GtUword) (qptr - qstart);
  if (witnessposition != NULL)
  {
    GtUword startpos = gt_fmfindtextpos (fmindex,prevlbound);
    gt_assert((fmindex->bwtlength-1) >= (startpos + matchlength));
    *witnessposition = (fmindex->bwtlength-1) - (startpos + matchlength);
  }
  return matchlength;
}
Exemple #4
0
static void showmergertrie2(const Mergertrierep *trierep,
                            const GtUchar *characters,
                            unsigned int level,
                            const Mergertrienode *node)
{
  GtUchar cc = 0;
  GtUword pos, endpos;
  Mergertrienode *current;

  for (current = node->firstchild;
       current != NULL;
       current = current->rightsibling)
  {
    printf("%*.*s",(int) (6 * level),(int) (6 * level)," ");
    if (MTRIE_ISLEAF(current))
    {
      endpos = gt_encseq_total_length(
                                 trierep->encseqtable[current->suffixinfo.idx]);
    } else
    {
      endpos = current->suffixinfo.startpos + current->depth;
    }
    for (pos = current->suffixinfo.startpos + node->depth;
         pos < endpos; pos++)
    {
      cc = gt_encseq_get_encoded_char( /* just for testing */
              trierep->enseqreadinfo[current->suffixinfo.idx].encseqptr,
              pos,
              trierep->enseqreadinfo[current->suffixinfo.idx].readmode);
      if (ISSPECIAL(cc))
      {
        printf("#\n");
        break;
      }
      printf("%c",characters[(int) cc]);
    }
    if (MTRIE_ISLEAF(current))
    {
      if (!ISSPECIAL(cc))
      {
        printf("~\n");
      }
    } else
    {
      printf(" d="GT_WU",i=" Formatuint64_t "\n",
            current->depth,
            PRINTuint64_tcast(current->suffixinfo.ident));
      showmergertrie2(trierep,characters,level+1,current);
    }
  }
}
Exemple #5
0
static void followleafedge(Pckbuckettable *pckbt,const void *voidbwtseq,
                           const Boundsatdepth *bd)
{
  Bwtseqcontextiterator *bsci;
  GtUchar cc;
  Boundsatdepth bdleaf;

  bdleaf.code = bd->code;
  bdleaf.depth = bd->depth;
  bdleaf.lowerbound = bd->lowerbound;
  bsci = newBwtseqcontextiterator(voidbwtseq,bdleaf.lowerbound);
  while (bdleaf.depth < pckbt->maxdepth)
  {
    bdleaf.depth++;
    cc = nextBwtseqcontextiterator(&bdleaf.lowerbound,bsci);
    if (ISSPECIAL(cc))
    {
      break;
    }
    bdleaf.code = bdleaf.code * pckbt->numofchars + cc;
    bdleaf.upperbound = bdleaf.lowerbound+1;
    storeBoundsatdepth(pckbt,&bdleaf);
  }
  freeBwtseqcontextiterator(&bsci);
}
static void special_queue_verify(const GtKmerstream *kmerstream)
{
  unsigned int i, contextsize = 0, verified = 0;
  GtSpecialcontext *queueptr = kmerstream->specialqueue.dequeueptr;

  gt_assert(kmerstream->kmersize == kmerstream->windowwidth);
  for (i=0; i < kmerstream->kmersize; i++)
  {
    GtUchar cc = kmerstream->cyclicwindow[(kmerstream->firstindex+i) %
                                           kmerstream->kmersize];
    if (ISSPECIAL(cc))
    {
      gt_assert (queueptr->lengthofleftcontext == contextsize);
      if (queueptr > kmerstream->specialqueue.queuespace)
      {
        queueptr--;
      } else
      {
        queueptr = kmerstream->specialqueue.queuespace +
                   kmerstream->specialqueue.queuesize - 1;
      }
      verified++;
      contextsize = 0;
    } else
    {
      contextsize++;
    }
  }
  gt_assert(verified == kmerstream->specialqueue.noofelements);
}
Exemple #7
0
static void pckbuckettable_followleafedge(Pckbuckettable *pckbt,
                                          const FMindex *fmindex,
                                          const Pckbck_Boundsatdepth *bd)
{
  Bwtseqcontextiterator *bsci;
  GtUchar cc;
  Pckbck_Boundsatdepth bdleaf;

  gt_assert(bd != NULL);
  bdleaf.code = bd->code;
  bdleaf.depth = bd->depth;
  bdleaf.lowerbound = bd->lowerbound;
  bsci = gt_Bwtseqcontextiterator_new(fmindex,bdleaf.lowerbound);
  while (bdleaf.depth < pckbt->maxdepth)
  {
    bdleaf.depth++;
    cc = gt_Bwtseqcontextiterator_next(&bdleaf.lowerbound,bsci);
    if (ISSPECIAL(cc))
    {
      break;
    }
    bdleaf.code = bdleaf.code * pckbt->numofchars + cc;
    bdleaf.upperbound = bdleaf.lowerbound+1;
    pckbuckettable_storeBoundsatdepth(pckbt,&bdleaf);
  }
  gt_Bwtseqcontextiterator_delete(bsci);
  bsci = NULL;
}
Exemple #8
0
int ExprMatch (char *string, char *expr)
{
  while (1)
    {
      if (ISSPECIAL(*expr))
        {
          if (*expr == '/')
            {
              if (*string != '\\' && *string != '/')
                return 0;
              string ++; expr++;
            }
          else if (*expr == '*')
            {
              if (*expr ++ == 0)
                return 1;
              while (*++string != *expr)
                if (*string == 0)
                  return 0;
            }
        }
      else
        {
          if (*string != *expr)
            return 0;
          if (*expr++ == 0)
            return 1;
          string++;
        }
    }
}
Exemple #9
0
static GtUword gt_mmsearch_extendright(const GtEncseq *dbencseq,
                                             GtEncseqReader *esr,
                                             GtReadmode readmode,
                                             GtUword totallength,
                                             GtUword dbend,
                                             const GtQuerysubstring
                                               *querysubstring,
                                             GtUword matchlength)
{
  GtUchar dbchar;
  GtUword dbpos, querypos;

  if (dbend < totallength)
  {
    gt_encseq_reader_reinit_with_readmode(esr,dbencseq,readmode,dbend);
  }
  for (dbpos = dbend, querypos = querysubstring->currentoffset + matchlength;
       dbpos < totallength &&
       querypos < querysubstring->queryrep->seqlen;
       dbpos++, querypos++)
  {
    dbchar = gt_encseq_reader_next_encoded_char(esr);
    if (ISSPECIAL(dbchar) ||
        dbchar != gt_mmsearch_accessquery(querysubstring->queryrep,querypos))
    {
      break;
    }
  }
  return dbpos - dbend;
}
Exemple #10
0
static GtUchar gt_mmsearch_accessquery(const GtQueryrepresentation *queryrep,
                                       GtUword pos)
{
  GtUword abspos, cc;

  gt_assert(queryrep != NULL);
  gt_assert(pos < queryrep->seqlen);
  abspos = queryrep->startpos + (queryrep->readmode == GT_READMODE_FORWARD
                                  ? pos
                                  : GT_REVERSEPOS(queryrep->seqlen,pos));
  if (queryrep->sequence != NULL)
  {
    cc = queryrep->sequence[abspos];
  } else
  {
    gt_assert(queryrep->encseq != NULL);
    cc = gt_encseq_get_encoded_char(queryrep->encseq,abspos,
                                    GT_READMODE_FORWARD);
  }
  if (GT_ISDIRCOMPLEMENT(queryrep->readmode))
  {
    if (ISSPECIAL(cc))
    {
      return cc;
    }
    return GT_COMPLEMENTBASE(cc);
  } else
  {
    return cc;
  }
}
Exemple #11
0
static unsigned long gt_esa2shulengthquery(const Suffixarray *suffixarray,
                                           const GtUchar *query,
                                           unsigned long querylen)
{
  const GtUchar *qptr;
  unsigned long totalgmatchlength = 0, gmatchlength, remaining;
  unsigned long totallength = gt_encseq_total_length(suffixarray->encseq);

  for (qptr = query, remaining = querylen; remaining > 0; qptr++, remaining--)
  {
    if (ISSPECIAL(*qptr))
    {
      gmatchlength = 0;
    } else
    {
      gmatchlength = gt_esa2shulengthatposition(suffixarray,
                                              totallength,
                                              0,
                                              0,
                                              totallength,
                                              qptr,
                                              query+querylen);
    }
    totalgmatchlength += gmatchlength;
  }
  return totalgmatchlength;
}
void bare_encseq_convert(GtBareEncseq *bare_encseq,bool forward,bool direct)
{
  GtUchar *leftptr, *rightptr;

  if (forward)
  {
    gt_assert(!direct);
    for (leftptr = bare_encseq->sequence;
         leftptr < bare_encseq->sequence + bare_encseq->totallength;
         leftptr++)
    {
      if (ISNOTSPECIAL(*leftptr))
      {
        *leftptr = GT_COMPLEMENTBASE(*leftptr);
      }
    }
  } else
  {
    if (direct)
    {
      for (leftptr = bare_encseq->sequence,
           rightptr = bare_encseq->sequence + bare_encseq->totallength - 1;
           leftptr < rightptr; leftptr++, rightptr--)
      {
        GtUchar tmp = *leftptr;
        *leftptr = *rightptr;
        *rightptr = tmp;
      }
    } else
    {
      for (leftptr = bare_encseq->sequence,
           rightptr = bare_encseq->sequence + bare_encseq->totallength - 1;
           leftptr <= rightptr; leftptr++, rightptr--)
      {
        GtUchar tmp = *leftptr;
        *leftptr = ISSPECIAL(*rightptr) ? *rightptr
                                        : GT_COMPLEMENTBASE(*rightptr);
        *rightptr = ISSPECIAL(tmp) ? tmp
                                   : GT_COMPLEMENTBASE(tmp);
      }
    }
  }
}
Exemple #13
0
static int mtrie_comparecharacters(GtUchar cc1,unsigned int idx1,
                                   GtUchar cc2,unsigned int idx2)
{
  if (ISSPECIAL(cc1))
  {
    if (ISSPECIAL(cc2))
    {
      if (idx1 <= idx2)
      {
        return -1;  /* cc1 < cc2 */
      } else
      {
        return 1;  /* cc1 > cc2 */
      }
    } else
    {
      return 1; /* cc1 > cc2 */
    }
  } else
  {
    if (ISSPECIAL(cc2))
    {
      return -1;  /* cc1 < cc2 */
    } else
    {
      if (cc1 < cc2)
      {
        return -1;  /* cc1 < cc2 */
      } else
      {
        if (cc1 > cc2)
        {
          return 1;  /* cc1 > cc2 */
        } else
        {
          return 0; /* cc1 == cc2 */
        }
      }
    }
  }
}
GtUword gt_skfmuniqueforward (const void *genericindex,
                                 GT_UNUSED GtUword offset,
                                 GT_UNUSED GtUword left,
                                 GT_UNUSED GtUword right,
                                 GT_UNUSED GtUword *witnessposition,
                                 const GtUchar *qstart,
                                 const GtUchar *qend)
{
  GtUchar cc;
  const GtUchar *qptr;
  GtUlongBound bwtbound;
  const Fmindex *fmindex = (Fmindex *) genericindex;

  gt_assert(qstart < qend);
  qptr = qstart;
  cc = *qptr++;
  if (ISSPECIAL(cc))
  {
    return 0;
  }
  bwtbound.lbound = fmindex->tfreq[cc];
  bwtbound.ubound = fmindex->tfreq[cc+1];
  while (qptr < qend && bwtbound.lbound + 1 < bwtbound.ubound)
  {
    cc = *qptr;
    if (ISSPECIAL (cc))
    {
      return 0;
    }
    bwtbound.lbound = fmindex->tfreq[cc] +
                      fmoccurrence (fmindex, cc, bwtbound.lbound);
    bwtbound.ubound = fmindex->tfreq[cc] +
                      fmoccurrence (fmindex, cc, bwtbound.ubound);
    qptr++;
  }
  if (bwtbound.lbound + 1 == bwtbound.ubound)
  {
    return (GtUword) (qptr - qstart);
  }
  return 0;
}
static GtCodetype windowkmer2code(unsigned int numofchars,
                                unsigned int kmersize,
                                const GtUchar *cyclicwindow,
                                unsigned int firstindex)
{
  unsigned int i;
  GtCodetype integercode;
  GtUchar cc;
  bool foundspecial;

  cc = cyclicwindow[firstindex];
  if (ISSPECIAL(cc))
  {
    integercode = (GtCodetype) (numofchars-1);
    foundspecial = true;
  } else
  {
    integercode = (GtCodetype) cc;
    foundspecial = false;
  }
  for (i=1U; i < kmersize; i++)
  {
    if (foundspecial)
    {
      ADDNEXTCHAR(integercode,numofchars-1,numofchars);
    } else
    {
      cc = cyclicwindow[(firstindex+i) % kmersize];
      if (ISSPECIAL(cc))
      {
        ADDNEXTCHAR(integercode,numofchars-1,numofchars);
        foundspecial = true;
      } else
      {
        ADDNEXTCHAR(integercode,cc,numofchars);
      }
    }
  }
  return integercode;
}
Exemple #16
0
GtWord gt_scorehandler_get_replacement(const GtScoreHandler *scorehandler,
                                       GtUchar a, GtUchar b)
{
  gt_assert(scorehandler != NULL);
  if (scorehandler->scorematrix == NULL)
  {
    if (scorehandler->mappedsequence)
    {
      return ISSPECIAL(a) || ISSPECIAL(b) || a != b
               ? scorehandler->mismatchscore
               : scorehandler->matchscore;
    }
    if (scorehandler->downcase)
    {
      a = tolower((int) a);
      b = tolower((int) b);
    }
    return a != b ? scorehandler->mismatchscore
                  : scorehandler->matchscore;
  }
  gt_assert(scorehandler->mappedsequence);
  return gt_score_matrix_get_score(scorehandler->scorematrix,a,b);
}
static unsigned int determinefirstspecialposition(unsigned int windowwidth,
                                                  unsigned int kmersize,
                                                  const GtUchar *cyclicwindow,
                                                  unsigned int firstindex)
{
  unsigned int i;

  for (i=0; i < windowwidth; i++)
  {
    if (ISSPECIAL(cyclicwindow[(firstindex+i) % kmersize]))
    {
      return i;
    }
  }
  return kmersize;
}
GtBareEncseq *gt_bare_encseq_new(GtUchar *sequence,GtUword len,
                                 GtUword numofchars)
{
  GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq);
  const GtUchar *readptr;
  GtBareSpecialrange *srptr = NULL;
  GtUword lastspecialrange_length = 0;

  bare_encseq->specialcharacters = 0;
  bare_encseq->numofchars = numofchars;
  bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars,
                                     sizeof *bare_encseq->charcount);
  GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange);
  for (readptr = sequence; readptr < sequence + len; readptr++)
  {
    GtUchar cc = *readptr;
    if (ISSPECIAL(cc))
    {
      if (lastspecialrange_length == 0)
      {
        GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges,
                              GtBareSpecialrange,128UL);
        srptr->start = (GtUword) (readptr - sequence);
      }
      lastspecialrange_length++;
      bare_encseq->specialcharacters++;
    } else
    {
      gt_assert((GtUword) cc < bare_encseq->numofchars);
      bare_encseq->charcount[(int) cc]++;
      if (lastspecialrange_length > 0)
      {
        gt_assert(srptr != NULL);
        srptr->length = lastspecialrange_length;
      }
      lastspecialrange_length = 0;
    }
  }
  if (lastspecialrange_length > 0)
  {
    gt_assert(srptr != NULL);
    srptr->length = lastspecialrange_length;
  }
  bare_encseq->sequence = sequence;
  bare_encseq->totallength = len;
  return bare_encseq;
}
Exemple #19
0
static unsigned long gt_esa2shulengthatposition(const Suffixarray *suffixarray,
                                                unsigned long totallength,
                                                unsigned long offset,
                                                unsigned long left,
                                                unsigned long right,
                                                const GtUchar *qstart,
                                                const GtUchar *qend)
{
  Simplelcpinterval itv;
  const GtUchar *qptr;

  gt_assert(left < right);
  itv.left = left;
  itv.right = right;
  /*printf("\n");*/
  for (qptr = qstart; /* Nothing */; qptr++, offset++)
  {
    if (itv.left <= itv.right)
    {
      /*
      if (qptr < qend)
      {
        printf("read %u\n",(unsigned int) *qptr);
      }
      */
      if (qptr >= qend || ISSPECIAL(*qptr) ||
          !gt_lcpintervalfindcharchildintv(suffixarray->encseq,
                                           suffixarray->readmode,
                                           totallength,
                                           suffixarray->suftab,
                                           &itv,
                                           *qptr,
                                           offset,
                                           itv.left,
                                           itv.right))
      {
        break;
      }
    } else
    {
      break;
    }
  }
  /*printf("add %lu\n",offset+1); */
  return offset+1;
}
static bool sequenceobject_symbol_match(Sequenceobject *useq,
                                        GtUword upos,
                                        Sequenceobject *vseq,
                                        GtUword vpos)
{
#ifndef OUTSIDE_OF_GT
  GtUchar cu = sequenceobject_get_char(useq,upos);
  if (ISSPECIAL(cu))
  {
    return false;
  }
  return cu == sequenceobject_get_char(vseq,vpos) ? true : false;
#else
  GtUchar cu = useq->sequence_ptr[upos];
  return cu == vseq->sequence_ptr[vpos] ? true : false;
#endif
}
Exemple #21
0
void gt_lcpintervalsplitwithoutspecial(GtArrayBoundswithchar *bwci,
                                       const GtEncseq *encseq,
                                       GtReadmode readmode,
                                       unsigned long totallength,
                                       const ESASuffixptr *suftab,
                                       unsigned long parentoffset,
                                       unsigned long parentleft,
                                       unsigned long parentright)
{
  GtUchar leftcc, rightcc;
  unsigned long rightbound = 0, leftbound = parentleft;

  /* call gt_lcpintervalextendlcp and verify if interval can be extended by
     some character */
  bwci->nextfreeBoundswithchar = 0;
  rightcc = SEQUENCE(encseq,ESASUFFIXPTRGET(suftab,parentright) + parentoffset);
  while (true)
  {
    leftcc = SEQUENCE(encseq,ESASUFFIXPTRGET(suftab,leftbound) + parentoffset);
    gt_assert(bwci->nextfreeBoundswithchar < bwci->allocatedBoundswithchar);
    if (ISSPECIAL(leftcc))
    {
      ADDPREVIOUSRBOUND(rightbound);
      ADDCURRENTLBOUND(rightbound+1);
      return;
    }
    ADDPREVIOUSRBOUND(leftbound-1);
    ADDCURRENTLBOUND(leftbound);
    ADDCURRENTINCHAR(leftcc);
    if (leftcc == rightcc)
    {
      break;
    }
    rightbound = lcpintervalfindrightbound(encseq,readmode,totallength,suftab,
                                           leftcc,parentoffset,
                                           leftbound,parentright);
    leftbound = rightbound+1;
  }
  gt_assert(bwci->nextfreeBoundswithchar < bwci->allocatedBoundswithchar);
  ADDPREVIOUSRBOUND(parentright);
  ADDCURRENTLBOUND(parentright+1);
}
static void verifymatch(const GtEncseq *encseq,
                        GtUword len,
                        GtUword pos1,
                        uint64_t seqnum2,
                        GtUword pos2,
                        GtReadmode readmode)
{
  if (readmode == GT_READMODE_REVERSE)
  {
    GtUword offset,
                  seqstartpos,
                  totallength = gt_encseq_total_length(encseq);
    GtUchar cc1, cc2;

    seqstartpos = gt_encseq_seqstartpos(encseq, seqnum2);
    pos2 += seqstartpos;
    for (offset = 0; offset < len; offset++)
    {
      gt_assert(pos1 + len - 1 < totallength);
      gt_assert(pos2 + len - 1 < totallength);
      cc1 = gt_encseq_get_encoded_char(encseq,pos1+offset,GT_READMODE_FORWARD);
      cc2 = gt_encseq_get_encoded_char(encseq,pos2+len-1-offset,
                                       GT_READMODE_FORWARD);
      gt_assert(cc1 == cc2 && ISNOTSPECIAL(cc1));
    }
    if (pos1 + len < totallength)
    {
      cc1 = gt_encseq_get_encoded_char(encseq,pos1+len,GT_READMODE_FORWARD);
    } else
    {
      cc1 = SEPARATOR;
    }
    if (pos2 > 0)
    {
      cc2 = gt_encseq_get_encoded_char(encseq,pos2-1,GT_READMODE_FORWARD);
    } else
    {
      cc2 = SEPARATOR;
    }
    gt_assert(cc1 != cc2 || ISSPECIAL(cc1));
  }
}
Exemple #23
0
GtUchar gt_lcpintervalextendlcp(const GtEncseq *encseq,
                                GtReadmode readmode,
                                const ESASuffixptr *suftab,
                                unsigned long totallength,
                                GtUchar alphasize,
                                unsigned long parentoffset,
                                unsigned long parentleft,
                                unsigned long parentright)
{
  GtUchar ccl, ccr;

  ccl = SEQUENCE(encseq,ESASUFFIXPTRGET(suftab,parentleft) + parentoffset);
  ccr = SEQUENCE(encseq,ESASUFFIXPTRGET(suftab,parentright) + parentoffset);
  if (ccl != ccr || ISSPECIAL(ccl))
  {
    return alphasize;
  }
  gt_assert(ccl < alphasize);
  return ccl;
}
static bool determinefirstspecialposition(unsigned int *firstspecialpos,
                                          unsigned int windowwidth,
                                          unsigned int kmersize,
                                                       const GtUchar
                                                         *cyclicwindow,
                                                       unsigned int firstindex)
{
  unsigned int i;

  for (i=0; i < windowwidth; i++)
  {
    if (ISSPECIAL(cyclicwindow[(firstindex+i) % kmersize]))
    {
      *firstspecialpos = i;
      return true;
    }
  }
  *firstspecialpos = 0; /* Just for satisfying the compiler */
  return false;
}
Exemple #25
0
static bool gt_mum_isleftmaximal(const GtEncseq *dbencseq,
                                 GtReadmode readmode,
                                 GtUword dbstart,
                                 GtUword queryoffset,
                                 const GtUchar *query)
{
  GtUchar dbleftchar;

  if (dbstart == 0 || queryoffset == 0)
  {
    return true;
  }
  dbleftchar = gt_encseq_get_encoded_char(dbencseq, /* Random access */
                                          dbstart-1,
                                          readmode);
  if (ISSPECIAL(dbleftchar) || dbleftchar != query[queryoffset-1])
  {
    return true;
  }
  return false;
}
Exemple #26
0
static GtUword getlcp(const GtEncseq *encseq1,
                            GtReadmode readmode1,
                            GtUword start1, GtUword end1,
                            const GtEncseq *encseq2,
                            GtReadmode readmode2,
                            GtUword start2, GtUword end2)
{
  GtUword i1, i2;
  GtUchar cc1;

  for (i1=start1, i2=start2; i1 <= end1 && i2 <= end2; i1++, i2++)
  {
    cc1 = gt_encseq_get_encoded_char(/*XXX*/ encseq1,i1,readmode1);
    if (cc1 != gt_encseq_get_encoded_char(/*XXX*/ encseq2,i2,readmode2)
          || ISSPECIAL(cc1))
    {
      break;
    }
  }
  return i1 - start1;
}
Exemple #27
0
unsigned long suffixarrayuniqueforward (const void *genericindex,
                                        unsigned long offset,
                                        Seqpos left,
                                        Seqpos right,
                                        GT_UNUSED Seqpos *witnessposition,
                                        const GtUchar *qstart,
                                        const GtUchar *qend)
{
    Simplelcpinterval itv;
    const GtUchar *qptr;
    const Suffixarray *suffixarray = (const Suffixarray *) genericindex;
    Seqpos totallength;

    itv.left = left;
    itv.right = right;
    totallength = getencseqtotallength(suffixarray->encseq);
    for (qptr = qstart; /* Nothing */; qptr++, offset++)
    {
        if (itv.left < itv.right)
        {
            if (qptr >= qend || ISSPECIAL(*qptr) ||
                    !lcpintervalfindcharchildintv(suffixarray->encseq,
                                                  suffixarray->readmode,
                                                  totallength,
                                                  suffixarray->suftab,
                                                  &itv,
                                                  *qptr,
                                                  (Seqpos) offset,
                                                  itv.left,
                                                  itv.right))
            {
                break;
            }
        } else
        {
            return offset;
        }
    }
    return 0;
}
Exemple #28
0
static bool gt_mmsearch_isleftmaximal(const GtEncseq *dbencseq,
                                      GtReadmode readmode,
                                      GtUword dbstart,
                                      const GtQuerysubstring *querysubstring)
{
  GtUchar dbleftchar;

  if (dbstart == 0 || querysubstring->currentoffset == 0)
  {
    return true;
  }
  dbleftchar = gt_encseq_get_encoded_char(dbencseq, /* Random access */
                                          dbstart-1,
                                          readmode);
  if (ISSPECIAL(dbleftchar) ||
      dbleftchar != gt_mmsearch_accessquery(querysubstring->queryrep,
                                            querysubstring->currentoffset-1))
  {
    return true;
  }
  return false;
}
Exemple #29
0
unsigned long suffixarraymstats (const void *genericindex,
                                 unsigned long offset,
                                 Seqpos left,
                                 Seqpos right,
                                 Seqpos *witnessposition,
                                 const GtUchar *qstart,
                                 const GtUchar *qend)
{
    Simplelcpinterval itv;
    const GtUchar *qptr;
    const Suffixarray *suffixarray = (const Suffixarray *) genericindex;
    Seqpos totallength;

    itv.left = left;
    itv.right = right;
    totallength = getencseqtotallength(suffixarray->encseq);
    for (qptr = qstart; /* Nothing */; qptr++, offset++)
    {
        gt_assert(itv.left <= itv.right);
        if (qptr >= qend || ISSPECIAL(*qptr) ||
                !lcpintervalfindcharchildintv(suffixarray->encseq,
                                              suffixarray->readmode,
                                              totallength,
                                              suffixarray->suftab,
                                              &itv,
                                              *qptr,
                                              (Seqpos) offset,
                                              itv.left,itv.right))
        {
            if (witnessposition != NULL)
            {
                *witnessposition = suffixarray->suftab[itv.left];
            }
            break;
        }
    }
    return offset;
}
Exemple #30
0
void gt_lookaheadsearchPSSM(const GtEncseq *encseq,
                            const Profilematrix *prof)
{
  unsigned long firstpos, bufsize;
  GtUchar currentchar;
  unsigned long pos;
  GtEncseqReader *esr;
  unsigned long totallength = gt_encseq_total_length(encseq);
  GtUchar *buffer;

  esr = gt_encseq_create_reader_with_readmode(encseq,GT_READMODE_FORWARD,0);
  buffer = gt_malloc(sizeof *buffer * prof->dimension);
  firstpos = bufsize = 0;
  for (pos=0; pos < totallength; pos++)
  {
    currentchar = gt_encseq_reader_next_encoded_char(esr);
    if (ISSPECIAL(currentchar))
    {
      bufsize = firstpos = 0;
    } else
    {
      if (bufsize < prof->dimension)
      {
        buffer[bufsize++] = currentchar;
      } else
      {
        buffer[firstpos++] = currentchar;
        if (firstpos == prof->dimension)
        {
          firstpos = 0;
        }
      }
    }
  }
  gt_encseq_reader_delete(esr);
  gt_free(buffer);
}