Esempio n. 1
0
static Seqpos *leftcontextofspecialchardist(unsigned int numofchars,
                                            const Encodedsequence *encseq,
                                            Readmode readmode)
{
  GtUchar cc;
  unsigned int idx;
  Seqpos *specialchardist, totallength = getencseqtotallength(encseq);

  specialchardist = gt_malloc(sizeof(*specialchardist) * numofchars);
  for (idx = 0; idx<numofchars; idx++)
  {
    specialchardist[idx] = 0;
  }
  if (hasspecialranges(encseq))
  {
    Specialrangeiterator *sri;
    Sequencerange range;

    sri = newspecialrangeiterator(encseq,true);
    if (ISDIRREVERSE(readmode))
    {
      Readmode thismode = (readmode == Reversemode) ? Forwardmode
                                                    : Complementmode;
      while (nextspecialrangeiterator(&range,sri))
      {
        if (range.rightpos < totallength)
        {
          cc = getencodedchar(encseq,range.rightpos,thismode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    } else
    {
      while (nextspecialrangeiterator(&range,sri))
      {
        gt_assert(range.leftpos < totallength);
        if (range.leftpos > 0)
        {
          cc = getencodedchar(encseq,range.leftpos-1,readmode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    }
    freespecialrangeiterator(&sri);
  }
  if (getencseqlengthofspecialsuffix(encseq) == 0)
  {
    cc = getencodedchar(encseq,totallength-1,readmode);
    gt_assert(ISNOTSPECIAL(cc));
    specialchardist[cc]++;
  }
  return specialchardist;
}
Esempio n. 2
0
static void backwardderive(const GtBucketspec2 *bucketspec2,
                           GtSuffixsortspace *suffixsortspace,
                           GtUword *targetoffset,
                           unsigned int source,
                           GtUword idx)
{
  GtUword startpos;
  GtUchar cc;

  for (; idx + 1 > targetoffset[source] + 1; idx--)
  {
    startpos = gt_suffixsortspace_getdirect(suffixsortspace,idx);
    if (startpos > 0)
    {
      cc = gt_encseq_get_encoded_char(bucketspec2->encseq,
                                      startpos-1,
                                      bucketspec2->readmode);
      if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted)
      {
        gt_suffixsortspace_setdirect(suffixsortspace,targetoffset[cc],
                                     startpos - 1);
        targetoffset[cc]--;
      }
    }
  }
}
Esempio n. 3
0
bool gt_Enumcodeatposition_filledqgramcodestopatmax(
                                        GtCodetype *code,
                                        const Enumcodeatposition *ecp,
                                        unsigned int prefixindex,
                                        unsigned long pos,
                                        GtCodetype stopcode)
{
  GtCodetype tmpcode;
  unsigned int idx;
  GtUchar cc;

  gt_assert(prefixindex > 0 && prefixindex < ecp->prefixlength);
  tmpcode = ecp->filltable[prefixindex];
  if (tmpcode > stopcode)
  {
    return false;
  }
  for (idx=0; idx<prefixindex; idx++)
  {
    gt_assert((unsigned long) (pos + idx) < ecp->totallength);
    cc = gt_encseq_get_encoded_char_nospecial(ecp->encseq,
                                              pos + idx,
                                              ecp->readmode);
    gt_assert(ISNOTSPECIAL(cc));
    tmpcode += ecp->multimappower[idx][cc];
    if (tmpcode > stopcode)
    {
      return false;
    }
  }
  *code = tmpcode;
  return true;
}
Esempio n. 4
0
static void iteritvdistribution(GtArrayuint64_t *distribution,
                                const GtEncseq *encseq,
                                GtReadmode readmode,
                                unsigned long totallength,
                                unsigned long minmersize,
                                unsigned long maxmersize,
                                unsigned long length,
                                unsigned long startpos)
{

    if (length <= (unsigned long) maxmersize)
    {
        unsigned long ulen, pos;

        for (ulen = length,
                pos = startpos + length - 1;
                ulen <= (unsigned long) maxmersize &&
                pos < totallength &&
                ISNOTSPECIAL(gt_encseq_get_encoded_char(encseq,pos,readmode));
                pos++, ulen++)
        {
            if (ulen >= (unsigned long) minmersize)
            {
                adddistributionuint64_t(distribution,(unsigned long) ulen,1UL);
            }
        }
    }
}
Esempio n. 5
0
static void forwardderive(const GtBucketspec2 *bucketspec2,
                          Seqpos **targetptr,
                          unsigned int source,
                          Seqpos *idx)
{
  Seqpos startpos;
  GtUchar cc;

  gt_assert (idx < targetptr[source]);
  for (; idx < targetptr[source]; idx++)
  {
    startpos = *idx;
    if (startpos > 0)
    {
      cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode);
      /*printf("fwd: superbucket[%u].sorted = %s\n",(unsigned int) cc,
                        bucketspec2->superbuckettab[cc].sorted ? "true" :
                                                                 "false"); */
      if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted)
      {
        *(targetptr[cc]) = startpos - 1;
        targetptr[cc]++;
      }
    }
  }
}
Esempio n. 6
0
static void updatespecialpositions(Kmerstream *spwp,
                                   GtUchar charcode,
                                   bool doshift,
                                   GtUchar lchar)
{
  if (doshift) {
    if (!specialqueueisempty(&spwp->spos)) {
      Specialitem *head;

      /* only here we add some element to the queue */
      head = specialheadofqueue(&spwp->spos);
      if (head->distvalue > 0) {
        SUBTRACTLCHARANDSHIFT(head->codeforleftcontext,lchar,spwp->numofchars,
                              spwp->multimappower[0]);
        head->distvalue--;
      }
      else {
        specialdeleteheadofqueue(&spwp->spos);
        if (!specialqueueisempty(&spwp->spos)) {
          head = specialheadofqueue(&spwp->spos);
          head->distvalue--;
        }
      }
    }
  }
  if (ISNOTSPECIAL(charcode)) {
    if (spwp->lengthwithoutspecial == spwp->kmersize) {
      SUBTRACTLCHARSHIFTADDNEXT(spwp->codewithoutspecial,
                                lchar,
                                spwp->numofchars,
                                spwp->multimappower[0],
                                charcode);
    }
    else {
      spwp->codewithoutspecial +=
        spwp->multimappower[spwp->lengthwithoutspecial][charcode];
      spwp->lengthwithoutspecial++;
    }
  }
  else {
    /* only here we add some element to the queue */
    Specialitem newelem;
    /* memset(&newelem, 0, sizeof (newelem)); */

    if (specialqueueisempty(&spwp->spos))
      newelem.distvalue = spwp->windowwidth-1U;
    else
      newelem.distvalue = spwp->lengthwithoutspecial+1U;

    if (spwp->lengthwithoutspecial == spwp->kmersize) {
      SUBTRACTLCHARANDSHIFT(spwp->codewithoutspecial,lchar,
                            spwp->numofchars,spwp->multimappower[0]);
    }
    newelem.codeforleftcontext = spwp->codewithoutspecial;
    specialenqueue(&spwp->spos, newelem);
    spwp->lengthwithoutspecial = 0;
    spwp->codewithoutspecial = 0;
  }
}
static void kmerstream_updatespecialpositions(GtKmerstream *spwp,
                                              GtUchar charcode,
                                              bool doshift,
                                              GtUchar leftchar)
{
  if (doshift && !special_queue_is_empty(&spwp->specialqueue))
  {
    GtSpecialcontext *head = special_queue_head_get(&spwp->specialqueue);

    if (head->lengthofleftcontext > 0)
    {
      SUBTRACTLCHARANDSHIFT(head->codeofleftcontext,leftchar,spwp->numofchars,
                            spwp->multimappower[0]);
      head->lengthofleftcontext--;
    } else
    {
      special_queueu_head_delete(&spwp->specialqueue);
    }
  }
  if (ISNOTSPECIAL(charcode))
  {
    if (spwp->lengthwithoutspecial == spwp->kmersize)
    {
      SUBTRACTLCHARSHIFTADDNEXT(spwp->codewithoutspecial,
                                leftchar,
                                spwp->numofchars,
                                spwp->multimappower[0],
                                charcode);
    } else
    {
      spwp->codewithoutspecial
        += spwp->multimappower[spwp->lengthwithoutspecial][charcode];
      spwp->lengthwithoutspecial++;
    }
  } else
  {
    /* only here we add some element to the queue */
    unsigned int newelem_lengthofleftcontext
      = special_queue_is_empty(&spwp->specialqueue)
          ? (spwp->windowwidth - 1U)
          : spwp->lengthwithoutspecial;
    if (spwp->lengthwithoutspecial == spwp->kmersize)
    {
      SUBTRACTLCHARANDSHIFT(spwp->codewithoutspecial,
                            leftchar,
                            spwp->numofchars,
                            spwp->multimappower[0]);
    }
    /* only here we add some element to the queue */
    gt_assert(newelem_lengthofleftcontext < spwp->kmersize);
    special_queue_enqueue(&spwp->specialqueue,
                          newelem_lengthofleftcontext,
                          spwp->codewithoutspecial);
    spwp->lengthwithoutspecial = 0;
    spwp->codewithoutspecial = 0;
  }
}
Esempio n. 8
0
static inline GtWtreeSymbol gt_wtree_encseq_map(GtWtreeEncseq *wtree_encseq,
                                                GtUchar symbol)
{
  if (ISNOTSPECIAL(symbol))
    return (GtWtreeSymbol) symbol;
  else {
    if (symbol == (GtUchar) SEPARATOR) {
      return (GtWtreeSymbol) wtree_encseq->alpha_size - 1;
    }
    if (symbol == (GtUchar) WILDCARD)
      return (GtWtreeSymbol) wtree_encseq->alpha_size - 2;
  }
  gt_assert(symbol == (GtUchar) UNDEFCHAR);
  return (GtWtreeSymbol) wtree_encseq->alpha_size - 3;
}
Esempio n. 9
0
GtWord gt_alignment_eval_with_score(const GtAlignment *alignment,
                                  GtWord matchscore,
                                  GtWord mismatchscore,
                                  GtWord gapscore)
{
  GtUword i, j, idx_u = 0, idx_v = 0, meoplen;
  GtWord sumscore = 0;
  GtMultieop *meop;

  gt_assert(alignment != NULL);
  gt_assert(gt_alignment_is_valid(alignment));

  meoplen = gt_multieoplist_get_length(alignment->eops);

  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type) {
      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop->steps; j++) {
          if (alignment->u[idx_u] == alignment->v[idx_v] &&
              ISNOTSPECIAL(alignment->u[idx_u])) {
            sumscore += matchscore;
          }
          else {
            sumscore += mismatchscore;
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        sumscore += gapscore * meop->steps;
        idx_u += meop->steps;
        break;
      case Insertion:
        sumscore += gapscore * meop->steps;
        idx_v += meop->steps;
        break;
    }
  }
  return sumscore;
}
Esempio n. 10
0
void gt_fprintfencseq(FILE *fpout,
                      const GtEncseq *encseq,
                      unsigned long start,
                      unsigned long wlen)
{
    unsigned long idx;
    GtUchar currentchar;
    const GtAlphabet *alpha;

    alpha = gt_encseq_alphabet(encseq);
    for (idx = start; idx < start + wlen; idx++)
    {
        currentchar = gt_encseq_get_encoded_char(encseq,
                      idx,
                      GT_READMODE_FORWARD);
        gt_assert(ISNOTSPECIAL(currentchar));
        gt_alphabet_echo_pretty_symbol(alpha,fpout,currentchar);
    }
}
Esempio n. 11
0
void bare_encseq_convert(GtBareEncseq *bare_encseq,bool forward,bool direct)
{
  GtUchar *leftptr, *rightptr;

  if (forward)
  {
    gt_assert(!direct);
    for (leftptr = bare_encseq->sequence;
         leftptr < bare_encseq->sequence + bare_encseq->totallength;
         leftptr++)
    {
      if (ISNOTSPECIAL(*leftptr))
      {
        *leftptr = GT_COMPLEMENTBASE(*leftptr);
      }
    }
  } else
  {
    if (direct)
    {
      for (leftptr = bare_encseq->sequence,
           rightptr = bare_encseq->sequence + bare_encseq->totallength - 1;
           leftptr < rightptr; leftptr++, rightptr--)
      {
        GtUchar tmp = *leftptr;
        *leftptr = *rightptr;
        *rightptr = tmp;
      }
    } else
    {
      for (leftptr = bare_encseq->sequence,
           rightptr = bare_encseq->sequence + bare_encseq->totallength - 1;
           leftptr <= rightptr; leftptr++, rightptr--)
      {
        GtUchar tmp = *leftptr;
        *leftptr = ISSPECIAL(*rightptr) ? *rightptr
                                        : GT_COMPLEMENTBASE(*rightptr);
        *rightptr = ISSPECIAL(tmp) ? tmp
                                   : GT_COMPLEMENTBASE(tmp);
      }
    }
  }
}
Esempio n. 12
0
static void verifymatch(const GtEncseq *encseq,
                        GtUword len,
                        GtUword pos1,
                        uint64_t seqnum2,
                        GtUword pos2,
                        GtReadmode readmode)
{
  if (readmode == GT_READMODE_REVERSE)
  {
    GtUword offset,
                  seqstartpos,
                  totallength = gt_encseq_total_length(encseq);
    GtUchar cc1, cc2;

    seqstartpos = gt_encseq_seqstartpos(encseq, seqnum2);
    pos2 += seqstartpos;
    for (offset = 0; offset < len; offset++)
    {
      gt_assert(pos1 + len - 1 < totallength);
      gt_assert(pos2 + len - 1 < totallength);
      cc1 = gt_encseq_get_encoded_char(encseq,pos1+offset,GT_READMODE_FORWARD);
      cc2 = gt_encseq_get_encoded_char(encseq,pos2+len-1-offset,
                                       GT_READMODE_FORWARD);
      gt_assert(cc1 == cc2 && ISNOTSPECIAL(cc1));
    }
    if (pos1 + len < totallength)
    {
      cc1 = gt_encseq_get_encoded_char(encseq,pos1+len,GT_READMODE_FORWARD);
    } else
    {
      cc1 = SEPARATOR;
    }
    if (pos2 > 0)
    {
      cc2 = gt_encseq_get_encoded_char(encseq,pos2-1,GT_READMODE_FORWARD);
    } else
    {
      cc2 = SEPARATOR;
    }
    gt_assert(cc1 != cc2 || ISSPECIAL(cc1));
  }
}
Esempio n. 13
0
GtCodetype gt_Enumcodeatposition_filledqgramcode(const Enumcodeatposition *ecp,
                                                 unsigned int prefixindex,
                                                 unsigned long pos)
{
  GtCodetype code;
  unsigned int idx;
  GtUchar cc;

  gt_assert(prefixindex > 0 && prefixindex < ecp->prefixlength);
  code = ecp->filltable[prefixindex];
  for (idx=0; idx<prefixindex; idx++)
  {
    gt_assert((unsigned long) (pos + idx) < ecp->totallength);
    cc = gt_encseq_get_encoded_char_nospecial(ecp->encseq,
                                              pos + idx,
                                              ecp->readmode);
    gt_assert(ISNOTSPECIAL(cc));
    code += ecp->multimappower[idx][cc];
  }
  return code;
}
Esempio n. 14
0
static void backwardderive(const GtBucketspec2 *bucketspec2,
                           Seqpos **targetptr,
                           unsigned int source,
                           Seqpos *idx)
{
  Seqpos startpos;
  GtUchar cc;

  gt_assert (idx > targetptr[source]);
  for (; idx > targetptr[source]; idx--)
  {
    startpos = *idx;
    if (startpos > 0)
    {
      cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode);
      /*printf("back: superbucket[%u].sorted = %s\n",(unsigned int) cc,
                        bucketspec2->superbuckettab[cc].sorted ? "true" :
                                                                 "false");*/
      if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted)
      {
        /*
        if (suftab[targetptr[cc]] != startpos - 1)
        {
          fprintf(stderr,"targetptr[%u]=%lu: suftab = %lu != "
                         "%lu = startpos - 1\n",
                         cc,
                         (unsigned long) targetptr[cc],
                         (unsigned long) suftab[targetptr[cc]],
                         (unsigned long) (startpos-1));
          exit(EXIT_FAILURE);
        }
        */
        *(targetptr[cc]) = startpos - 1;
        targetptr[cc]--;
      }
    }
  }
}
Esempio n. 15
0
static GtUword *leftcontextofspecialchardist(unsigned int numofchars,
                                                   const GtEncseq *encseq,
                                                   GtReadmode readmode)
{
  GtUchar cc;
  unsigned int idx;
  GtUword *specialchardist,
                totallength = gt_encseq_total_length(encseq);
  GtReadmode convertedreadmode = (readmode == GT_READMODE_REVERSE)
                                      ? GT_READMODE_FORWARD
                                      : GT_READMODE_COMPL;

  specialchardist = gt_malloc(sizeof (*specialchardist) * numofchars);
  for (idx = 0; idx<numofchars; idx++)
  {
    specialchardist[idx] = 0;
  }
  if (gt_encseq_has_specialranges(encseq))
  {
    GtSpecialrangeiterator *sri;
    GtRange range;
    sri = gt_specialrangeiterator_new(encseq,true);
    if (GT_ISDIRREVERSE(readmode))
    {
      while (gt_specialrangeiterator_next(sri,&range))
      {
        if (range.end < totallength)
        {
          cc = gt_encseq_get_encoded_char(encseq,range.end,convertedreadmode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    } else
    {
      while (gt_specialrangeiterator_next(sri,&range))
      {
        if (range.start > 0)
        {
          cc = gt_encseq_get_encoded_char(encseq,range.start-1,readmode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    }
    gt_specialrangeiterator_delete(sri);
  }
  if (GT_ISDIRREVERSE(readmode))
  {
    if (gt_encseq_lengthofspecialprefix(encseq) == 0)
    {
      cc = gt_encseq_get_encoded_char(encseq,0,convertedreadmode);
      gt_assert(ISNOTSPECIAL(cc));
      specialchardist[cc]++;
    }
  } else
  {
    if (gt_encseq_lengthofspecialsuffix(encseq) == 0)
    {
      cc = gt_encseq_get_encoded_char(encseq,totallength-1,readmode);
      gt_assert(ISNOTSPECIAL(cc));
      specialchardist[cc]++;
    }
  }
  return specialchardist;
}
Esempio n. 16
0
void gt_alignment_show_with_mapped_chars(const GtAlignment *alignment,
                                         const GtUchar *characters,
                                         GtUchar wildcardshow,
                                         FILE *fp)
{
  GtUword i, j, idx_u, idx_v, meoplen;
  GtMultieop *meop;

  gt_assert(alignment);
  gt_assert(gt_alignment_is_valid(alignment));

  meoplen = gt_multieoplist_get_length(alignment->eops);
  /* output first line */
  idx_u = 0;
  for (i = meoplen; i > 0; i--)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type)
    {
      case Mismatch:
      case Match:
      case Replacement:
      case Deletion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(ISSPECIAL(alignment->u[idx_u]) ?
                    (int) wildcardshow :
                    (int) characters[alignment->u[idx_u]], fp);
          idx_u++;
        }
        break;
      case Insertion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(GAPSYMBOL, fp);
        }
        break;
    }
  }
  gt_xfputc('\n', fp);
  /* output middle line */
  idx_u = idx_v = 0;
  for (i = meoplen; i > 0; i--)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type)
    {
      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop->steps; j++)
        {
          if (alignment->u[idx_u] == alignment->v[idx_v] &&
              ISNOTSPECIAL(alignment->u[idx_u]))
          {
            gt_xfputc(MATCHSYMBOL, fp);
          } else
          {
            gt_xfputc(MISMATCHSYMBOL, fp);
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(MISMATCHSYMBOL, fp);
          idx_u++;
        }
        break;
      case Insertion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(MISMATCHSYMBOL, fp);
          idx_v++;
        }
        break;
    }
  }
  gt_xfputc('\n', fp);
  /* ouput last line */
  idx_v = 0;
  for (i = meoplen; i > 0; i--)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type)
    {
      case Mismatch:
      case Match:
      case Replacement:
      case Insertion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(ISSPECIAL(alignment->v[idx_v]) ?
                    (int) wildcardshow :
                    (int) characters[alignment->v[idx_v]], fp);
          idx_v++;
        }
        break;
      case Deletion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(GAPSYMBOL, fp);
        }
        break;
    }
  }
  gt_xfputc('\n', fp);
}
Esempio n. 17
0
static inline void
getMatchBound(const BWTSeq *bwtSeq, const Symbol *query, size_t queryLen,
              struct matchBound *match, bool forward)
{
  const Symbol *qptr, *qend;
  unsigned int cc;
  const Mbtab *mbptr;
  GtPrebwtstate prebwt;

  gt_assert(bwtSeq && query);
  if (forward)
  {
    qptr = query;
    qend = query + queryLen;
  } else
  {
    qptr = query + queryLen - 1;
    qend = query - 1;
  }
  gt_assert(ISNOTSPECIAL(*qptr));
  cc = (unsigned int) *qptr;
  prebwt.mbtab = gt_bwtseq2mbtab((const FMindex *) bwtSeq);
  if (prebwt.mbtab != NULL)
  {
    prebwt.numofchars = gt_bwtseq2numofchars((const FMindex *) bwtSeq);
    prebwt.maxdepth = gt_bwtseq2maxdepth((const FMindex *) bwtSeq);
    prebwt.code = 0;
    prebwt.depth = 0;
    mbptr = gt_prebwt_next(&prebwt,cc);
    match->start = mbptr->lowerbound;
    match->end = mbptr->upperbound;
  } else
  {
    prebwt.numofchars = GT_UNDEF_UINT;
    prebwt.maxdepth = GT_UNDEF_UINT;
    prebwt.code = 0;
    prebwt.depth = GT_UNDEF_UINT;
    match->start = bwtSeq->count[cc];
    match->end   = bwtSeq->count[cc + 1];
  }
  qptr = forward ? (qptr+1) : (qptr-1);
  while (match->start < match->end && qptr != qend)
  {
    GtUwordPair occPair;

    gt_assert(ISNOTSPECIAL(*qptr));
    cc = (unsigned int) *qptr;
    if (prebwt.mbtab != NULL && prebwt.depth < prebwt.maxdepth)
    {
      mbptr = gt_prebwt_next(&prebwt,cc);
      match->start = mbptr->lowerbound;
      match->end = mbptr->upperbound;
    } else
    {
      occPair = BWTSeqTransformedPosPairOcc(bwtSeq, (Symbol) cc, match->start,
                                            match->end);
      match->start = bwtSeq->count[cc] + occPair.a;
      match->end   = bwtSeq->count[cc] + occPair.b;
    }
    qptr = forward ? (qptr+1) : (qptr-1);
  }
}