Beispiel #1
0
void gt_Outlcpinfo_check_lcpvalues(const GtEncseq *encseq,
                                   GtReadmode readmode,
                                   const GtSuffixsortspace *sortedsample,
                                   GtUword effectivesamplesize,
                                   const GtOutlcpinfo *outlcpinfosample,
                                   bool checkequality)
{
  GT_UNUSED int cmp;
  GtUword idx, reallcp, startpos1, startpos2, currentlcp,
                totalcmpmissing = 0;

  if (effectivesamplesize == 0)
  {
    return;
  }
  startpos1 = gt_suffixsortspace_getdirect(sortedsample,0);
  for (idx=1UL; idx<effectivesamplesize; idx++)
  {
    startpos2 = gt_suffixsortspace_getdirect(sortedsample,idx);
    cmp = gt_encseq_check_comparetwosuffixes(encseq,
                                             readmode,
                                             &reallcp,
                                             false,
                                             false,
                                             0,
                                             startpos1,
                                             startpos2,
                                             NULL,
                                             NULL);
    gt_assert(cmp <= 0);
    gt_assert(GT_ISIBITSET(outlcpinfosample->lcpsubtab.tableoflcpvalues
                                                      .isset,idx));
    currentlcp = (GtUword) outlcpinfosample->lcpsubtab.tableoflcpvalues.
                                 bucketoflcpvalues[idx];
    if ((checkequality && currentlcp != reallcp) ||
        (!checkequality && currentlcp > reallcp))
    {
      fprintf(stderr,"idx="GT_WU",suffixpair="GT_WU","GT_WU": "
                     "currentlcp = "GT_WU" %s "GT_WU" = reallcp\n",
                      idx,startpos1,startpos2,currentlcp,
                      checkequality ? "!=" : ">",reallcp);
      gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos1,50UL);
      fprintf(stderr,"\n");
      gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos2,50UL);
      fprintf(stderr,"\n");
      exit(GT_EXIT_PROGRAMMING_ERROR);
    } else
    {
      totalcmpmissing += (reallcp - currentlcp);
    }
    startpos1 = startpos2;
  }
  /*printf("totalcmpmissing = "GT_WU"(avg=%.2f)\n",
         totalcmpmissing,(double) totalcmpmissing/effectivesamplesize);*/
}
Beispiel #2
0
static void backwardderive(const GtBucketspec2 *bucketspec2,
                           GtSuffixsortspace *suffixsortspace,
                           GtUword *targetoffset,
                           unsigned int source,
                           GtUword idx)
{
  GtUword startpos;
  GtUchar cc;

  for (; idx + 1 > targetoffset[source] + 1; idx--)
  {
    startpos = gt_suffixsortspace_getdirect(suffixsortspace,idx);
    if (startpos > 0)
    {
      cc = gt_encseq_get_encoded_char(bucketspec2->encseq,
                                      startpos-1,
                                      bucketspec2->readmode);
      if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted)
      {
        gt_suffixsortspace_setdirect(suffixsortspace,targetoffset[cc],
                                     startpos - 1);
        targetoffset[cc]--;
      }
    }
  }
}
unsigned long gt_suffixsortspace_get(const GtSuffixsortspace *sssp,
                                     unsigned long subbucketleft,
                                     unsigned long idx)
{
  return gt_suffixsortspace_getdirect(sssp, sssp->bucketleftidx
                                              + subbucketleft
                                              + idx
                                              - sssp->partoffset);
}
Beispiel #4
0
GtUword gt_suffixsortspace_get(const GtSuffixsortspace *sssp,
                                     GtUword subbucketleft,
                                     GtUword idx)
{
  return gt_suffixsortspace_getdirect(sssp, sssp->bucketleftidx
                                              + subbucketleft
                                              + idx
                                              - sssp->partoffset);
}
void gt_suffixsortspace_checkorder(const GtSuffixsortspace *sssp,
                                   unsigned long subbucketleft,
                                   unsigned long width)
{
  unsigned long idx, currentpos;
  GT_UNUSED unsigned long prevpos;

  gt_assert(width > 0);
  prevpos = gt_suffixsortspace_getdirect(sssp,
                                         sssp->bucketleftidx+subbucketleft -
                                         sssp->partoffset);
  for (idx=sssp->bucketleftidx+subbucketleft - sssp->partoffset + 1;
       idx<sssp->bucketleftidx+subbucketleft + width - sssp->partoffset;
       idx++)
  {
    currentpos = gt_suffixsortspace_getdirect(sssp,idx);
    gt_assert(prevpos > currentpos);
    prevpos = currentpos;
  }
}
void gt_suffixsortspace_showrange(const GtSuffixsortspace *sssp,
                                  unsigned long subbucketleft,
                                  unsigned long width)
{
  unsigned long idx;

  printf("%lu,%lu=",sssp->bucketleftidx+subbucketleft-sssp->partoffset,
                    sssp->bucketleftidx+subbucketleft+width-1-sssp->partoffset);
  for (idx=sssp->bucketleftidx+subbucketleft-sssp->partoffset;
       idx<sssp->bucketleftidx+subbucketleft+width-sssp->partoffset;
       idx++)
  {
    printf(" %lu", gt_suffixsortspace_getdirect(sssp,idx));
  }
}
Beispiel #7
0
void gt_suffixsortspace_showrange(const GtSuffixsortspace *sssp,
                                  GtUword subbucketleft,
                                  GtUword width)
{
  GtUword idx;

  printf(""GT_WU","GT_WU"=",sssp->bucketleftidx+subbucketleft-sssp->partoffset,
                    sssp->bucketleftidx+subbucketleft+width-1-sssp->partoffset);
  for (idx=sssp->bucketleftidx+subbucketleft-sssp->partoffset;
       idx<sssp->bucketleftidx+subbucketleft+width-sssp->partoffset;
       idx++)
  {
    printf(" "GT_WU"", gt_suffixsortspace_getdirect(sssp,idx));
  }
}
Beispiel #8
0
size_t gt_translateSuftab2BWTSuffixsortspace(
                                       void *translator,
                                       void *voiddest,
                                       const GtSuffixsortspace *suffixsortspace,
                                       unsigned long offset,
                                       size_t len)
{
  struct encSeqTrState *trState = (struct encSeqTrState *) translator;
  GtUchar *dest = (GtUchar *) voiddest;
  size_t idx;

  gt_assert(trState);
  for (idx = 0; idx < len; ++idx)
  {
    dest[idx]
      = sfxIdx2BWTSym(gt_suffixsortspace_getdirect(suffixsortspace,offset+idx),
                      trState->encseq, trState->readmode);
  }
  return len * sizeof (GtUchar);
}
Beispiel #9
0
static int gt_seqorder_runner(GT_UNUSED int argc, const char **argv,
    int parsed_args, void *tool_arguments, GtError *err)
{
  GtSeqorderArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseq *encseq;
  GtEncseqLoader *loader;
  unsigned long i, nofseqs;

  gt_error_check(err);
  gt_assert(arguments != NULL);

  /* load encseq */
  loader = gt_encseq_loader_new();
  encseq = gt_encseq_loader_load(loader, argv[parsed_args], err);
  if (encseq == NULL)
    had_err = -1;
  if (had_err == 0 && !gt_encseq_has_description_support(encseq))
    gt_warning("%s has no description support", argv[parsed_args]);
  if (!had_err)
  {
    nofseqs = gt_encseq_num_of_sequences(encseq);
    if (arguments->invert)
    {
      for (i = nofseqs; i > 0; i--)
        gt_seqorder_output(i - 1, encseq);
    }
    else if (arguments->shuffle)
    {
      unsigned long *seqnums;
      seqnums = gt_malloc(sizeof (unsigned long) * nofseqs);
      gt_seqorder_get_shuffled_seqnums(nofseqs, seqnums);
      for (i = 0; i < nofseqs; i++)
        gt_seqorder_output(seqnums[i], encseq);
      gt_free(seqnums);
    }
    else
    {
      GtSuffixsortspace *suffixsortspace;
      gt_assert(arguments->sort || arguments->revsort);
      suffixsortspace
        = gt_suffixsortspace_new(nofseqs,
                                 /* Use iterator over sequence separators:
                                    saves a lot of binary searches */
                                 gt_encseq_seqstartpos(encseq, nofseqs-1),
                                 false,NULL);
      gt_seqorder_sort(suffixsortspace, encseq);
      if (arguments->sort)
        for (i = 0; i < nofseqs; i++)
          gt_seqorder_output(gt_encseq_seqnum(encseq,
                gt_suffixsortspace_getdirect(suffixsortspace, i)), encseq);
      else
        for (i = nofseqs; i > 0; i--)
          gt_seqorder_output(gt_encseq_seqnum(encseq,
                gt_suffixsortspace_getdirect(suffixsortspace, i - 1)), encseq);
      gt_suffixsortspace_delete(suffixsortspace, false);
    }
  }

  gt_encseq_loader_delete(loader);
  gt_encseq_delete(encseq);
  return had_err;
}
Beispiel #10
0
void gt_copysort_derivesorting(const GtBucketspec2 *bucketspec2,
                               GtSuffixsortspace *suffixsortspace,
                               GtLogger *logger)
{
  GtUword hardwork = 0,
                *targetoffset;
  unsigned int idx, idxsource, source, second;

#ifdef WITHSUFFIXES
  {
    GtUword idx;
    for (idx = 0; idx < bucketspec2->partwidth; idx++)
    {
      gt_encseq_showatstartpos(
                            stdout,
                            GT_ISDIRREVERSE(readmode) ? false : true,
                            GT_ISDIRCOMPLEMENT(readmode) ? true : false,
                            encseq,
                            gt_suffixsortspace_getdirect(suffixsortspace,idx));
    }
  }
#endif
  targetoffset = gt_malloc(sizeof (*targetoffset) * bucketspec2->numofchars);
  for (idxsource = 0; idxsource<bucketspec2->numofchars; idxsource++)
  {
    source = bucketspec2->order[idxsource];
    for (second = 0; second < bucketspec2->numofchars; second++)
    {
      if (!bucketspec2->subbuckettab[source][second].sorted && source != second)
      {
        gt_assert(bucketspec2->subbuckettab[source][second].hardworktodo);
        gt_logger_log(logger,"hard work for %u %u",source,second);
        hardwork += getendidx(bucketspec2,source,second) -
                    getstartidx(bucketspec2,source,second);
        bucketspec2->subbuckettab[source][second].sorted = true;
      } else
      {
        gt_assert(!bucketspec2->subbuckettab[source][second].hardworktodo);
      }
    }
    if (getstartidx(bucketspec2,source,0) <
        getstartidx(bucketspec2,source,source))
    {
      for (idx = 0; idx < bucketspec2->numofchars; idx++)
      {
        targetoffset[idx] = getstartidx(bucketspec2,idx,source);
      }
      forwardderive(bucketspec2,
                    suffixsortspace,
                    targetoffset,
                    source,
                    getstartidx(bucketspec2,source,0));
    }
    if (getendidx(bucketspec2,source,source) <
        getendidx(bucketspec2,source,bucketspec2->numofchars))
    {
      for (idx = 0; idx < bucketspec2->numofchars; idx++)
      {
        /* do not need to assert that getendidx(idx,source)  > 0, as later the
           value stored in targetoffset is incremented */
        targetoffset[idx] = getendidx(bucketspec2,idx,source) - 1;
      }
      gt_assert(getendidx(bucketspec2,source,bucketspec2->numofchars) > 0);
      backwardderive(bucketspec2,
                     suffixsortspace,
                     targetoffset,
                     source,
                     getendidx(bucketspec2,source,bucketspec2->numofchars) - 1);
    }
    for (idx = 0; idx < bucketspec2->numofchars; idx++)
    {
      bucketspec2->subbuckettab[idx][source].sorted = true;
    }
    bucketspec2->superbuckettab[source].sorted = true;
  }
  gt_free(targetoffset);
  gt_logger_log(logger,"hardwork = "GT_WU" (%.2f)",
                hardwork,
                (double) hardwork/gt_encseq_total_length(bucketspec2->encseq));
}