Beispiel #1
0
static Seqpos *leftcontextofspecialchardist(unsigned int numofchars,
                                            const Encodedsequence *encseq,
                                            Readmode readmode)
{
  GtUchar cc;
  unsigned int idx;
  Seqpos *specialchardist, totallength = getencseqtotallength(encseq);

  specialchardist = gt_malloc(sizeof(*specialchardist) * numofchars);
  for (idx = 0; idx<numofchars; idx++)
  {
    specialchardist[idx] = 0;
  }
  if (hasspecialranges(encseq))
  {
    Specialrangeiterator *sri;
    Sequencerange range;

    sri = newspecialrangeiterator(encseq,true);
    if (ISDIRREVERSE(readmode))
    {
      Readmode thismode = (readmode == Reversemode) ? Forwardmode
                                                    : Complementmode;
      while (nextspecialrangeiterator(&range,sri))
      {
        if (range.rightpos < totallength)
        {
          cc = getencodedchar(encseq,range.rightpos,thismode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    } else
    {
      while (nextspecialrangeiterator(&range,sri))
      {
        gt_assert(range.leftpos < totallength);
        if (range.leftpos > 0)
        {
          cc = getencodedchar(encseq,range.leftpos-1,readmode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    }
    freespecialrangeiterator(&sri);
  }
  if (getencseqlengthofspecialsuffix(encseq) == 0)
  {
    cc = getencodedchar(encseq,totallength-1,readmode);
    gt_assert(ISNOTSPECIAL(cc));
    specialchardist[cc]++;
  }
  return specialchardist;
}
Beispiel #2
0
static void runscanatpostrial(const Encodedsequence *encseq,
                              Encodedsequencescanstate *esr,
                              Readmode readmode,Seqpos startpos)
{
  Seqpos pos, totallength;
  GtUchar ccra, ccsr;

  totallength = getencseqtotallength(encseq);
  initEncodedsequencescanstate(esr,encseq,readmode,startpos);
  for (pos=startpos; pos < totallength; pos++)
  {
    ccra = getencodedchar(encseq,pos,readmode); /* Random access */
    ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode);
    if (ccra != ccsr)
    {
      fprintf(stderr,"startpos = " FormatSeqpos
                     " access=%s, mode=%s: position=" FormatSeqpos
                     ": random access (correct) = %u != %u = "
                     " sequential read (wrong)\n",
                     PRINTSeqposcast(startpos),
                     encseqaccessname(encseq),
                     showreadmode(readmode),
                     PRINTSeqposcast(pos),
                     (unsigned int) ccra,
                     (unsigned int) ccsr);
      exit(GT_EXIT_PROGRAMMING_ERROR);
    }
  }
}
Beispiel #3
0
static void forwardderive(const GtBucketspec2 *bucketspec2,
                          Seqpos **targetptr,
                          unsigned int source,
                          Seqpos *idx)
{
  Seqpos startpos;
  GtUchar cc;

  gt_assert (idx < targetptr[source]);
  for (; idx < targetptr[source]; idx++)
  {
    startpos = *idx;
    if (startpos > 0)
    {
      cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode);
      /*printf("fwd: superbucket[%u].sorted = %s\n",(unsigned int) cc,
                        bucketspec2->superbuckettab[cc].sorted ? "true" :
                                                                 "false"); */
      if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted)
      {
        *(targetptr[cc]) = startpos - 1;
        targetptr[cc]++;
      }
    }
  }
}
Beispiel #4
0
unsigned long distanceofshortstringsencseq(unsigned long *eqsvector,
                                           unsigned int alphasize,
                                           const GtUchar *useq,
                                           unsigned long ulen,
                                           const Encodedsequence *encseq,
                                           Seqpos vstartpos,
                                           Seqpos vlen)
{
  DECLARELOCALVARS;
  GtUchar cc;
  Seqpos pos;

  initeqsvector(eqsvector,(unsigned long) alphasize,useq,ulen);
  for (pos = vstartpos; pos < vstartpos + vlen; pos++)
  {
    cc = getencodedchar(encseq,pos,Forwardmode);
    COMPUTENEWDIST(cc);
  }
  return distval;
}
Beispiel #5
0
static void backwardderive(const GtBucketspec2 *bucketspec2,
                           Seqpos **targetptr,
                           unsigned int source,
                           Seqpos *idx)
{
  Seqpos startpos;
  GtUchar cc;

  gt_assert (idx > targetptr[source]);
  for (; idx > targetptr[source]; idx--)
  {
    startpos = *idx;
    if (startpos > 0)
    {
      cc = getencodedchar(bucketspec2->encseq,startpos-1,bucketspec2->readmode);
      /*printf("back: superbucket[%u].sorted = %s\n",(unsigned int) cc,
                        bucketspec2->superbuckettab[cc].sorted ? "true" :
                                                                 "false");*/
      if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted)
      {
        /*
        if (suftab[targetptr[cc]] != startpos - 1)
        {
          fprintf(stderr,"targetptr[%u]=%lu: suftab = %lu != "
                         "%lu = startpos - 1\n",
                         cc,
                         (unsigned long) targetptr[cc],
                         (unsigned long) suftab[targetptr[cc]],
                         (unsigned long) (startpos-1));
          exit(EXIT_FAILURE);
        }
        */
        *(targetptr[cc]) = startpos - 1;
        targetptr[cc]--;
      }
    }
  }
}
Beispiel #6
0
Definedunsignedlong forwardprefixmatch(const Encodedsequence *encseq,
                                       unsigned int alphasize,
                                       Seqpos startpos,
                                       bool nowildcards,
                                       unsigned long *eqsvector,
                                       const GtUchar *useq,
                                       unsigned long ulen,
                                       unsigned long maxdistance)
{
  DECLARELOCALVARS;
  Seqpos pos, totallength = getencseqtotallength(encseq);
  GtUchar cc;
  Definedunsignedlong result;

  initeqsvector(eqsvector,(unsigned long) alphasize,useq,ulen);
  gt_assert(maxdistance > 0);
  for (pos = startpos; /* Nothing */; pos++)
  {
    gt_assert(pos - startpos <= (Seqpos) (ulen + maxdistance));
    cc = getencodedchar(encseq,pos,Forwardmode);
    if (nowildcards && cc == (GtUchar) WILDCARD)
    {
      result.defined = false;
      result.valueunsignedlong = 0;
      return result;
    }
    COMPUTENEWDIST(cc);
    if (distval <= maxdistance || pos == totallength-1)
    {
      break;
    }
  }
  result.defined = true;
  result.valueunsignedlong = (unsigned long) (pos - startpos + 1);
  return result;
}
Beispiel #7
0
static int testfullscan(const GtStrArray *filenametab,
                        const Encodedsequence *encseq,
                        Readmode readmode,
                        GtError *err)
{
  Seqpos pos, totallength;
  GtUchar ccscan = 0, ccra, ccsr;
  GtSequenceBuffer *fb = NULL;
  int retval;
  bool haserr = false;
  Encodedsequencescanstate *esr;
  unsigned long long fullscanpbar = 0;

  gt_error_check(err);
  totallength = getencseqtotallength(encseq);
  gt_progressbar_start(&fullscanpbar,(unsigned long long) totallength);
  if (filenametab != NULL)
  {
    fb = gt_sequence_buffer_new_guess_type((GtStrArray*) filenametab, err);
    if (!fb)
      haserr = true;
    if (!haserr)
      gt_sequence_buffer_set_symbolmap(fb, getencseqAlphabetsymbolmap(encseq));
  }
  if (!haserr) {
    esr = newEncodedsequencescanstate();
    initEncodedsequencescanstate(esr,encseq,readmode,0);
    for (pos=0; /* Nothing */; pos++)
    {
      if (filenametab != NULL && readmode == Forwardmode)
      {
        retval = gt_sequence_buffer_next(fb,&ccscan,err);
        if (retval < 0)
        {
          haserr = true;
          break;
        }
        if (retval == 0)
        {
          break;
        }
      } else
      {
        if (pos >= totallength)
        {
          break;
        }
      }
      ccra = getencodedchar(encseq,pos,readmode); /* Random access */
      if (filenametab != NULL && readmode == Forwardmode)
      {
        if (ccscan != ccra)
        {
          gt_error_set(err,"access=%s, position=" FormatSeqpos
                            ": scan (readnextchar) = %u != "
                            "%u = random access",
                            encseqaccessname(encseq),
                            pos,
                            (unsigned int) ccscan,
                            (unsigned int) ccra);
          haserr = true;
          break;
        }
      }
      ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode);
      if (ccra != ccsr)
      {
        gt_error_set(err,"access=%s, mode=%s: position=" FormatSeqpos
                          ": random access = %u != %u = sequential read",
                          encseqaccessname(encseq),
                          showreadmode(readmode),
                          pos,
                          (unsigned int) ccra,
                          (unsigned int) ccsr);
        haserr = true;
        break;
      }
      fullscanpbar++;
    }
    gt_progressbar_stop();
  }
  if (!haserr)
  {
    if (pos != totallength)
    {
      gt_error_set(err,"sequence length must be " FormatSeqpos " but is "
                         FormatSeqpos,totallength,pos);
      haserr = true;
    }
  }
  freeEncodedsequencescanstate(&esr);
  gt_sequence_buffer_delete(fb);
  return haserr ? -1 : 0;
}
Beispiel #8
0
static void producelongutput(const LTRharvestoptions *lo,
                             const LTRboundaries *boundaries,
                             const Encodedsequence *encseq,
                             Seqpos offset)
{
  const GtUchar *characters = getencseqAlphabetcharacters(encseq);

  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->leftLTR_5 -offset + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->rightLTR_3 -offset  + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast((boundaries->rightLTR_3 - boundaries->leftLTR_5
          + 1)));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->leftLTR_5 -offset  + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->leftLTR_3 -offset  + 1));
  printf(FormatSeqpos "  ",
      PRINTSeqposcast((boundaries->leftLTR_3 - boundaries->leftLTR_5
          + 1)));
  if (lo->minlengthTSD > 1U)
  {
    Seqpos j;

    for (j = 0; j < boundaries->lenleftTSD; j++)
    {
      printf("%c",(char) characters[getencodedchar(encseq,
                                                   boundaries->leftLTR_5 -
                                                   boundaries->lenleftTSD + j,
                                                   Forwardmode)]);
    }
    printf("  " FormatSeqpos "  ",
           PRINTSeqposcast(boundaries->lenleftTSD));
  }
  if (lo->motif.allowedmismatches < 4U)
  {
    printf("%c%c..%c%c  ",
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_5,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_5+1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_3-1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->leftLTR_3,
                       Forwardmode)] );
  }
  /* increase by 1 */
  printf(FormatSeqpos "  ",
      PRINTSeqposcast(boundaries->rightLTR_5 -offset + 1));
  /* increase by 1 */
  printf(FormatSeqpos "  ",PRINTSeqposcast(boundaries->rightLTR_3 -offset + 1));
  printf(FormatSeqpos "  ",PRINTSeqposcast(boundaries->rightLTR_3
                                           - boundaries->rightLTR_5 + 1));
  if (lo->minlengthTSD > 1U)
  {
    Seqpos j;

    for (j = 0; j < boundaries->lenrightTSD; j++)
    {
      printf("%c", (char) characters[getencodedchar(encseq,
                                                    boundaries->rightLTR_3+j+1,
                                                    Forwardmode)]);
    }
    printf("  " FormatSeqpos "  ",PRINTSeqposcast(boundaries->lenrightTSD));
  }
  if (lo->motif.allowedmismatches < 4U)
  {
    printf("%c%c..%c%c",
        (char) characters[getencodedchar(encseq,/* Randomaccess */
                       boundaries->rightLTR_5,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Randomaccess */
                       boundaries->rightLTR_5+1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Randomaccess */
                       boundaries->rightLTR_3-1,
                       Forwardmode)],
        (char) characters[getencodedchar(encseq,/* Random access */
                       boundaries->rightLTR_3,/* Randomaccess */
                       Forwardmode)] );
  }
  /* print similarity */
  printf("  %.2f", boundaries->similarity);
  /* print sequence number */
  printf("  %lu\n", boundaries->contignumber);
}