Rankedbounds *gt_fillrankbounds(const GtEncseq *encseq,
                             GtReadmode readmode)
{
  if (gt_encseq_has_specialranges(encseq))
  {
    GtSpecialrangeiterator *sri;
    GtRange range;
    GtUword currentrank = 0, realspecialranges;
    Rankedbounds *rankedbounds, *rbptr;

    realspecialranges = gt_encseq_realspecialranges(encseq);
    rankedbounds = gt_malloc(sizeof (Rankedbounds) * realspecialranges);
    sri = gt_specialrangeiterator_new(encseq,
                                      GT_ISDIRREVERSE(readmode)
                                      ? false : true);
    for (rbptr = rankedbounds;
         gt_specialrangeiterator_next(sri,&range);
         rbptr++)
    {
      rbptr->lowerbound = range.start;
      rbptr->upperbound = range.end;
      rbptr->rank = currentrank;
      currentrank += rbptr->upperbound - rbptr->lowerbound;
    }
    gt_assert(rbptr == rankedbounds + realspecialranges);
    gt_specialrangeiterator_delete(sri);
    return rankedbounds;
  }
  return NULL;
}
Example #2
0
bool gt_Enumcodeatposition_next(Specialcontext *specialcontext,
                                Enumcodeatposition *ecp)
{
  GtRange currentrange;
  bool done;

  if (ecp->exhausted)
  {
    return false;
  }
  while (ecp->sri != NULL)
  {
    if (!gt_specialrangeiterator_next(ecp->sri,&currentrange))
    {
      gt_specialrangeiterator_delete(ecp->sri);
      ecp->sri = NULL;
      break;
    }
    if (ecp->moveforward)
    {
      if (newcodelistelem(specialcontext,
                          ecp->previousrange.end,
                          currentrange.start,
                          ecp))
      {
        ecp->previousrange = currentrange;
        return true;
      }
    } else
    {
      if (newcodelistelem(specialcontext,
                          currentrange.end,
                          ecp->previousrange.start,
                          ecp))
      {
        ecp->previousrange = currentrange;
        return true;
      }
    }
    ecp->previousrange = currentrange;
  }
  ecp->exhausted = true;
  if (ecp->moveforward)
  {
    done = newcodelistelem(specialcontext,
                           ecp->previousrange.end,
                           ecp->totallength,
                           ecp);
  } else
  {
    done = newcodelistelem(specialcontext,
                           0,
                           ecp->previousrange.start,
                           ecp);
  }
  return done;
}
Specialrank *gt_fillspecialranklist(const GtEncseq *encseq,
                                 GtReadmode readmode,
                                 const GtUword *inversesuftab)
{
  if (gt_encseq_has_specialranges(encseq))
  {
    GtSpecialrangeiterator *sri;
    GtRange range;
    GtUword realspecialranges, specialrank;
    GT_UNUSED GtUword totallength;
    Specialrank *specialranklist, *rbptr;

    totallength = gt_encseq_total_length(encseq);
    realspecialranges = gt_encseq_realspecialranges(encseq);
    specialranklist = gt_malloc(sizeof (Specialrank) * realspecialranges);
    sri = gt_specialrangeiterator_new(encseq,
                                  GT_ISDIRREVERSE(readmode)
                                  ? false : true);
    rbptr = specialranklist;
    specialrank = 0;
    while (gt_specialrangeiterator_next(sri,&range))
    {
      gt_assert(rbptr < specialranklist + realspecialranges);
      gt_assert(range.end<=totallength);
      specialrank += range.end - range.start;
      rbptr->specialrank = specialrank - 1;
      rbptr->key = inversesuftab[range.end];
      rbptr++;
    }
    gt_assert(rbptr == specialranklist + realspecialranges);
    gt_specialrangeiterator_delete(sri);
    qsort(specialranklist,(size_t) realspecialranges,
          sizeof (Specialrank),compareSpecialrank);
    return specialranklist;
  }
  return NULL;
}
Example #4
0
static int gt_encseq_bitextract_runner(GT_UNUSED int argc, const char **argv,
                                       GT_UNUSED int parsed_args,
                                       void *tool_arguments,
                                       GT_UNUSED GtError *err)
{
  GtEncseqBitextractArguments *arguments = tool_arguments;
  GtEncseqLoader *el;
  GtEncseq *encseq;
  int had_err = 0;
  bool fwd, it1, GT_UNUSED it2;
  char buffer[BUFSIZ];
  GtEndofTwobitencoding etbe;
  GtEncseqReader *esr;
  GtSpecialrangeiterator *sri;
  GtRange srng;
  GtReadmode rm;

  gt_error_check(err);
  gt_assert(arguments);

  el = gt_encseq_loader_new();
  encseq = gt_encseq_loader_load(el, argv[parsed_args], err);
  if (!encseq)
    had_err = -1;

  if (!had_err && arguments->mirror) {
    had_err = gt_encseq_mirror(encseq, err);
  }

  if (!had_err) {
    rm = gt_readmode_parse(gt_str_get(arguments->readmode), NULL);
    fwd = GT_ISDIRREVERSE(rm) ? false : true;
  }

  if (!had_err && arguments->bitpos != GT_UNDEF_ULONG) {
    if (arguments->bitpos >= gt_encseq_total_length(encseq)) {
      gt_error_set(err, "position %lu exceeds encoded sequence length of %lu",
                   arguments->bitpos, gt_encseq_total_length(encseq));
      had_err = -1;
    }

    if (!had_err) {
      unsigned long ret;
      esr = gt_encseq_create_reader_with_readmode(encseq, rm,
                                                  arguments->bitpos);
      ret = gt_encseq_extract2bitencwithtwobitencodingstoppos(&etbe, esr,
                                                        encseq,
                                                        rm, arguments->bitpos);
      gt_bitsequence_tostring(buffer, etbe.tbe);
      printf("Twobitencoding   %s\n"
             "unitsnotspecial  %u\n"
             "position         %lu\n"
             "returnvalue      %lu\n",
             buffer,
             etbe.unitsnotspecial,
             arguments->bitpos,
             ret);
      gt_encseq_reader_delete(esr);
    }
  }

  if (!had_err && arguments->stoppos != GT_UNDEF_ULONG) {
    if (arguments->stoppos >= gt_encseq_total_length(encseq)) {
      gt_error_set(err, "position %lu exceeds encoded sequence length of %lu",
                   arguments->stoppos, gt_encseq_total_length(encseq));
      had_err = -1;
    }
    if (!had_err) {
      esr = gt_encseq_create_reader_with_readmode(encseq, rm, 0);
      /* check stoppos stuff */
      gt_encseq_reader_reinit_with_readmode(esr, encseq, rm,
                                            arguments->stoppos);
      printf("%lu: %lu\n", arguments->stoppos,
                           gt_getnexttwobitencodingstoppos(fwd, esr));
      gt_encseq_reader_delete(esr);
    }
  }

  if (!had_err && arguments->specialranges) {
    /* check specialrangeiterator stuff */
    if (gt_encseq_has_specialranges(encseq)) {
      sri = gt_specialrangeiterator_new(encseq, fwd);
      while (true) {
        it1 = gt_specialrangeiterator_next(sri, &srng);
        if (it1)
          printf("%lu:%lu\n", srng.start, srng.end);
        else break;
      }
      gt_specialrangeiterator_delete(sri);
    }
  }

  gt_encseq_delete(encseq);
  gt_encseq_loader_delete(el);
  return had_err;
}
Example #5
0
static GtUword *leftcontextofspecialchardist(unsigned int numofchars,
                                                   const GtEncseq *encseq,
                                                   GtReadmode readmode)
{
  GtUchar cc;
  unsigned int idx;
  GtUword *specialchardist,
                totallength = gt_encseq_total_length(encseq);
  GtReadmode convertedreadmode = (readmode == GT_READMODE_REVERSE)
                                      ? GT_READMODE_FORWARD
                                      : GT_READMODE_COMPL;

  specialchardist = gt_malloc(sizeof (*specialchardist) * numofchars);
  for (idx = 0; idx<numofchars; idx++)
  {
    specialchardist[idx] = 0;
  }
  if (gt_encseq_has_specialranges(encseq))
  {
    GtSpecialrangeiterator *sri;
    GtRange range;
    sri = gt_specialrangeiterator_new(encseq,true);
    if (GT_ISDIRREVERSE(readmode))
    {
      while (gt_specialrangeiterator_next(sri,&range))
      {
        if (range.end < totallength)
        {
          cc = gt_encseq_get_encoded_char(encseq,range.end,convertedreadmode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    } else
    {
      while (gt_specialrangeiterator_next(sri,&range))
      {
        if (range.start > 0)
        {
          cc = gt_encseq_get_encoded_char(encseq,range.start-1,readmode);
          if (ISNOTSPECIAL(cc))
          {
            specialchardist[cc]++;
          }
        }
      }
    }
    gt_specialrangeiterator_delete(sri);
  }
  if (GT_ISDIRREVERSE(readmode))
  {
    if (gt_encseq_lengthofspecialprefix(encseq) == 0)
    {
      cc = gt_encseq_get_encoded_char(encseq,0,convertedreadmode);
      gt_assert(ISNOTSPECIAL(cc));
      specialchardist[cc]++;
    }
  } else
  {
    if (gt_encseq_lengthofspecialsuffix(encseq) == 0)
    {
      cc = gt_encseq_get_encoded_char(encseq,totallength-1,readmode);
      gt_assert(ISNOTSPECIAL(cc));
      specialchardist[cc]++;
    }
  }
  return specialchardist;
}