Пример #1
0
static GtUword gt_radixsort_findfirstlarger(const GtUword
                                                    *leftborder,
                                                  GtUword start,
                                                  GtUword end,
                                                  GtUword offset)
{
  const GtUword *left = leftborder + start,
                      *right = leftborder + end,
                      *found = leftborder + end;

  while (left <= right)
  {
    const GtUword *mid = left + GT_DIV2(right-left);
    gt_assert(mid >= leftborder + start && mid <= leftborder + end);
    if (offset == *mid)
    {
      return (GtUword) (mid - leftborder);
    }
    if (offset < *mid)
    {
      found = mid;
      right = mid - 1;
    } else
    {
      left = mid + 1;
    }
  }
  gt_assert(found >= leftborder);
  return (GtUword) (found - leftborder);
}
static GtUword lcpintervalfindlast(const GtEncseq *encseq,
                                         GtReadmode readmode,
                                         GtUword totallength,
                                         const ESASuffixptr *suftab,
                                         GtUchar cc,
                                         GtUword offset,
                                         GtUword left,
                                         GtUword right)
{
  GtUword found = ULONG_MAX;

  while (left <= right)
  {
    GtUword mid = left + GT_DIV2(right - left + 1);
    GtUword pos = ESASUFFIXPTRGET(suftab,mid) + offset;
    GtUchar midcc = SEQUENCE(encseq,pos);
    if (cc < midcc)
    {
      if (mid == 0)
      {
        break;
      }
      right = mid - 1;
    } else
    {
      if (cc == midcc)
      {
        found = mid;
      }
      left = mid + 1;
    }
  }
  return found;
}
Пример #3
0
static unsigned long lcpintervalfindrightbound(const GtEncseq *encseq,
                                        GtReadmode readmode,
                                        unsigned long totallength,
                                        const ESASuffixptr *suftab,
                                        GtUchar cc,
                                        unsigned long offset,
                                        unsigned long left,
                                        unsigned long right)
{
  unsigned long pos, mid;
  GtUchar midcc;

  while (right > left+1)
  {
    mid = GT_DIV2(left+right);
    pos = ESASUFFIXPTRGET(suftab,mid) + offset;
    midcc = SEQUENCE(encseq,pos);
    if (cc < midcc)
    {
      right = mid;
    } else
    {
      left = mid;
    }
  }
  return left;
}
Пример #4
0
static GtUword
gt_intset_16_binarysearch_sec_idx_largest_seq(GtUword *sectionstart,
                                              GtUword *secend,
                                              GtUword idx)
{
  GtUword *midptr = NULL, *found = NULL,
          *startorig = sectionstart;
  if (*sectionstart <= idx)
    found = sectionstart;
  while (sectionstart < secend) {
    midptr = sectionstart + (GtUword) GT_DIV2(secend - sectionstart);
    if (*midptr < idx) {
      found = midptr;
      if (*midptr == idx) {
        break;
      }
      sectionstart = midptr + 1;
    }
    else {
      secend = midptr - 1;
    }
  }
  gt_assert(found != NULL);
  while (found[1] <= idx)
    found++;
  return (GtUword) (found - startorig);
}
Пример #5
0
static unsigned long searchfastaqueryindes(const char *extractkey,
                                           const char *keytab,
                                           unsigned long numofkeys,
                                           unsigned long keysize)
{
  unsigned long left = 0, right = numofkeys - 1, mid;
  int cmp;

  while (left <= right)
  {
    mid = left + GT_DIV2((unsigned long) (right-left));
    cmp = strcmp(extractkey,keytab + 1UL + mid * (keysize+1));
    if (cmp < 0)
    {
      gt_assert(mid > 0);
      right = mid-1;
    } else
    {
      if (cmp > 0)
      {
        left = mid+1;
      } else
      {
        gt_assert(mid < numofkeys);
        return mid;
      }
    }
  }
  return numofkeys;
}
Пример #6
0
static /*@null@*/ const Largecount *binsearchLargecount(GtUword key,
                                                        const Largecount *left,
                                                        const Largecount *right)
{
  const Largecount *leftptr = left, *midptr, *rightptr = right;
  GtUword len;

  while (leftptr<=rightptr)
  {
    len = (GtUword) (rightptr-leftptr);
    midptr = leftptr + GT_DIV2(len); /* halve len */
    if (key < midptr->idx)
    {
      rightptr = midptr-1;
    } else
    {
      if (key > midptr->idx)
      {
        leftptr = midptr + 1;
      } else
      {
        return midptr;
      }
    }
  }
  return NULL;
}
Пример #7
0
GtUword gt_fromrank2pos(const Rankedbounds *leftptr,
                              const Rankedbounds *rightptr,
                              GtUword rank)
{
  const Rankedbounds *midptr;

  while (leftptr <= rightptr)
  {
    midptr = leftptr + GT_DIV2((GtUword) (rightptr-leftptr));
    if (rank < midptr->rank)
    {
      rightptr = midptr-1;
    } else
    {
      if (rank >= midptr->rank + (midptr->upperbound - midptr->lowerbound))
      {
        leftptr = midptr + 1;
      } else
      {
        return midptr->lowerbound + (rank - midptr->rank);
      }
    }
  }
  fprintf(stderr,"fromrank2rank: cannot find rank "GT_WU""
                 " in ranges",rank);
  exit(GT_EXIT_PROGRAMMING_ERROR);
  /*@ignore@*/
  return 0;
  /*@end@*/
}
Пример #8
0
GtUword gt_frompos2rank(const Rankedbounds *leftptr,
                              const Rankedbounds *rightptr,
                              GtUword specialpos)
{
  const Rankedbounds *midptr;

  while (leftptr <= rightptr)
  {
    midptr = leftptr + GT_DIV2((GtUword) (rightptr-leftptr));
    if (specialpos < midptr->lowerbound)
    {
      rightptr = midptr-1;
    } else
    {
      if (specialpos >= midptr->upperbound)
      {
        leftptr = midptr + 1;
      } else
      {
        return midptr->rank + specialpos - midptr->lowerbound;
      }
    }
  }
  fprintf(stderr,"frompos2rank: cannot find pos "GT_WU""
                 " in ranges",specialpos);
  exit(GT_EXIT_PROGRAMMING_ERROR);
  /*@ignore@*/
  return 0;
  /*@end@*/
}
Пример #9
0
static GtUword gt_wtree_encseq_select_rec(GtWtreeEncseq *we,
                                          GtUword i,
                                          GtWtreeSymbol sym,
                                          GtUword node_start,
                                          GtUword node_size,
                                          unsigned int alpha_start,
                                          unsigned int alpha_end)
{
  unsigned int middle = GT_DIV2(alpha_start + alpha_end);
  int bit;
  GtUword zero_rank_prefix = 0,
          one_rank_prefix = 0,
          left_child_size, child_start;

  if (alpha_start < alpha_end) {
    bit = middle < (unsigned int) sym ? 1 : 0;
    if (node_start != 0)
      zero_rank_prefix =
        gt_compressed_bitsequence_rank_0(we->c_bits, node_start - 1);
    left_child_size =
      gt_compressed_bitsequence_rank_0(we->c_bits, node_start + node_size - 1) -
      zero_rank_prefix;

    if (bit == 0) {
      alpha_end = middle;
      child_start = node_start + we->parent_instance.members->length;
      node_size = left_child_size;
    }
    else {
      if (node_start != 0)
        one_rank_prefix =
          gt_compressed_bitsequence_rank_1(we->c_bits, node_start - 1);
      alpha_start = middle + 1;
      node_size =
        gt_compressed_bitsequence_rank_1(we->c_bits,
                                         node_start + node_size - 1) -
        one_rank_prefix;
      child_start =
        node_start + we->parent_instance.members->length + left_child_size;
    }
    if (node_size != 0) {
      i = gt_wtree_encseq_select_rec(we, i, sym, child_start, node_size,
                                     alpha_start, alpha_end);
      if (i < node_size) {
        return (bit == 0 ?
                gt_compressed_bitsequence_select_0(we->c_bits,
                                                   zero_rank_prefix + i + 1) :
                gt_compressed_bitsequence_select_1(we->c_bits,
                                                   one_rank_prefix + i + 1)) -
          node_start;
      }
    }
    return ULONG_MAX;
  }
  if (i <= node_size)
    return i - 1;
  return ULONG_MAX;
}
Пример #10
0
static bool gt_wtree_encseq_set_nodestart_and_current_fo(GtWtreeEncseq *we,
                                                         unsigned int level,
                                                         GtWtreeSymbol sym)
{
  unsigned int alpha_end = we->alpha_size - 1,
               alpha_start = 0,
               middle = GT_DIV2(alpha_end);
  unsigned int c_level = 0;
  gt_assert(sym <= (GtWtreeSymbol) alpha_end);
  we->current_fo = we->root_fo;
  we->node_start = 0;

  while (c_level < level &&
         alpha_end > alpha_start) {
    if (sym <= (GtWtreeSymbol) middle) {
      alpha_end = middle;
      if (alpha_end > alpha_start &&
          we->current_fo->left == NULL)
        we->current_fo->left = gt_wtree_encseq_fill_offset_new();

      we->node_start =
        we->node_start + we->parent_instance.members->length;
      we->current_fo = we->current_fo->left;
    }
    else {
      alpha_start = middle + 1;
      if (alpha_end > alpha_start &&
          we->current_fo->right == NULL)
        we->current_fo->right = gt_wtree_encseq_fill_offset_new();

      /* start of right child: start of left + size of left */
      we->node_start =
        we->node_start + we->parent_instance.members->length +
        we->current_fo->left_size;
      we->current_fo = we->current_fo->right;
    }
    middle = GT_DIV2(alpha_start + alpha_end);
    c_level++;
  }
  return (sym <= (GtWtreeSymbol) middle);
}
Пример #11
0
GtUword gt_condenseq_uniques_position_binsearch(const GtCondenseq *condenseq,
                                                GtUword position)
{
  GtWord idx, low, high;
  gt_assert(condenseq && condenseq->udb_nelems > 0);
  low = (GtWord) -1;
  gt_safe_assign(high, condenseq->udb_nelems);
  idx = GT_DIV2(low + high);
  while (high - low > (GtWord) 1) {
    if (position < condenseq->uniques[idx].orig_startpos) {
      high = idx;
    }
    else {
      low = idx;
    }
    idx = GT_DIV2(low + high);
  }
  if (low > (GtWord) -1 && condenseq->uniques[idx].orig_startpos <= position)
    return (GtUword) idx;
  return condenseq->udb_nelems;
}
Пример #12
0
static GtWtreeSymbol gt_wtree_encseq_access_rec(GtWtreeEncseq *we,
                                                GtUword pos,
                                                GtUword node_start,
                                                GtUword node_size,
                                                unsigned int alpha_start,
                                                unsigned int alpha_end)
{
  unsigned int middle = GT_DIV2(alpha_start + alpha_end);
  int bit;
  GtUword zero_rank_prefix = 0,
          one_rank_prefix = 0,
          left_child_size;
  gt_assert(pos < node_size);

  if (alpha_start < alpha_end) {
    bit = gt_compressed_bitsequence_access(we->c_bits, node_start + pos);
    if (node_start != 0)
      zero_rank_prefix =
        gt_compressed_bitsequence_rank_0(we->c_bits, node_start - 1);
    left_child_size =
      gt_compressed_bitsequence_rank_0(we->c_bits, node_start + node_size - 1) -
      zero_rank_prefix;

    if (bit == 0) {
      pos = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + pos) -
        zero_rank_prefix - 1; /*convert count (rank) to positon */
      alpha_end = middle;
      node_start += we->parent_instance.members->length;
      node_size = left_child_size;
      return gt_wtree_encseq_access_rec(we, pos, node_start,
                                        node_size, alpha_start, alpha_end);
    }
    else {
      if (node_start != 0)
        one_rank_prefix =
          gt_compressed_bitsequence_rank_1(we->c_bits, node_start - 1);
      pos = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + pos) -
        one_rank_prefix - 1; /*convert count (rank) to positon */
      alpha_start = middle + 1;
      node_size =
        gt_compressed_bitsequence_rank_1(we->c_bits,
                                         node_start + node_size - 1) -
        one_rank_prefix;
      node_start +=
        we->parent_instance.members->length + left_child_size;
      return gt_wtree_encseq_access_rec(we, pos, node_start,
                                        node_size, alpha_start, alpha_end);
    }
  }
  return (GtWtreeSymbol) alpha_start;
}
Пример #13
0
static bool gt_intset_16_binarysearch_is_member(const uint16_t *leftptr,
                                                const uint16_t *rightptr,
                                                uint16_t elem)
{
  const uint16_t *midptr;
    while (leftptr <= rightptr) {
      midptr = leftptr + (GtUword) GT_DIV2(rightptr - leftptr);
      if (elem < *midptr) {
        rightptr = midptr - 1;
      }
      else {
        if (elem > *midptr)
          leftptr = midptr + 1;
        else
          return true;
      }
    }
  return false;
}
Пример #14
0
static GtUword gt_intset_16_binarysearch_idx_sm_geq(const uint16_t *leftptr,
                                                    const uint16_t *rightptr,
                                                    uint16_t value)
{
  const uint16_t *midptr = NULL,
        *leftorig = leftptr;

  gt_assert(leftptr <= rightptr);
  if (value <= *leftptr)
    return 0;
  if (value > *rightptr)
    return 1UL + (GtUword) (rightptr - leftptr);
  while (leftptr < rightptr) {
    midptr = leftptr + (GtUword) GT_DIV2(rightptr - leftptr);
    if (value <= *midptr)
      rightptr = midptr;
    else {
      leftptr = midptr + 1;
    }
  }
  return (GtUword) (leftptr - leftorig);
}
Пример #15
0
void gt_computefmkeyvalues (Fmindex *fm,
                            const GtSpecialcharinfo *specialcharinfo,
                            GtUword bwtlength,
                            unsigned int log2bsize,
                            unsigned int log2markdist,
                            unsigned int numofchars,
                            unsigned int suffixlength,
                            bool storeindexpos)
{
  fm->mappedptr = NULL;
  fm->log2bsize = log2bsize;
  fm->log2markdist = log2markdist;
  fm->bwtlength = bwtlength;
  fm->log2superbsize = GT_MULT2 (fm->log2bsize);
  fm->bsize = (unsigned int) GT_POW2 (fm->log2bsize);
  fm->bsizehalve = GT_DIV2(fm->bsize);
  fm->superbsize = (unsigned int) GT_POW2 (fm->log2superbsize);
  fm->nofblocks = (GtUword) (fm->bwtlength / fm->bsize) + 1;
  fm->nofsuperblocks = (GtUword) (fm->bwtlength / fm->superbsize) + 2;
  fm->markdist = (GtUword) GT_POW2 (fm->log2markdist);
  fm->markdistminus1 = (GtUword) (fm->markdist - 1);
  fm->negatebsizeones = ~ (GtUword) (fm->bsize - 1);
  fm->negatesuperbsizeones = ~ (GtUword) (fm->superbsize - 1);
  fm->log2superbsizeminuslog2bsize = fm->log2superbsize - fm->log2bsize;
  fm->mapsize = numofchars+1;
  fm->suffixlength = suffixlength;
  if (fm->suffixlength > 0)
  {
    fm->numofcodes = gt_power_for_small_exponents(fm->mapsize-1,
                                                  fm->suffixlength);
  } else
  {
    fm->numofcodes = 0;
  }
  fm->sizeofindex = determinefmindexsize (fm,
                                          specialcharinfo,
                                          suffixlength,
                                          storeindexpos);
}
Пример #16
0
static unsigned long searchdesinfastakeyqueries(const char *extractkey,
                                                const Fastakeyquery
                                                  *fastakeyqueries,
                                                unsigned long numofqueries)
{
  const Fastakeyquery *leftptr, *rightptr, *midptr;
  int cmp;

  leftptr = fastakeyqueries;
  rightptr = fastakeyqueries + numofqueries - 1;
  while (leftptr <= rightptr)
  {
    midptr = leftptr + GT_DIV2((unsigned long) (rightptr-leftptr));
    cmp = strcmp(extractkey,midptr->fastakey);
    if (cmp == 0)
    {
      if (midptr > fastakeyqueries &&
          strcmp(extractkey,(midptr-1)->fastakey) == 0)
      {
        rightptr = midptr - 1;
      } else
      {
        return (unsigned long) (midptr - fastakeyqueries);
      }
    } else
    {
      if (cmp < 0)
      {
        rightptr = midptr-1;
      } else
      {
        leftptr = midptr + 1;
      }
    }
  }
  return numofqueries;
}
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc,
    GT_UNUSED const char **argv, GT_UNUSED int parsed_args,
    void *tool_arguments, GT_UNUSED GtError *err)
{
  GtReadjoinerCnttestArguments *arguments = tool_arguments;
  GtEncseqLoader *el = NULL;
  GtEncseq *reads = NULL;
  GtBitsequence *bits = NULL;
  GtUword nofreads;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST)
  {
    GtStr *fn = NULL;
    fn = gt_str_clone(arguments->readset);
    gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST);
    had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err);
    gt_str_delete(fn);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE ||
      arguments->test == GT_READJOINER_CNTTEST_KMP)
  {
    el = gt_encseq_loader_new();
    gt_encseq_loader_drop_description_support(el);
    gt_encseq_loader_disable_autosupport(el);
    if (!arguments->singlestrand)
      gt_encseq_loader_mirror(el);
    reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err);
    if (reads == NULL)
      had_err = -1;
    else
    {
      gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand,
          false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true,
          NULL, NULL, false, NULL, &bits, &nofreads);
    }
    gt_encseq_delete(reads);
    gt_encseq_loader_delete(el);
  }
  else if (arguments->test == GT_READJOINER_CNTTEST_ESA)
  {
    Sequentialsuffixarrayreader *ssar = NULL;
    GtUword readlength = 0, firstrevcompl = 0;
    GtLogger *verbose_logger = gt_logger_new(arguments->verbose,
        GT_LOGGER_DEFLT_PREFIX, stderr);
    ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get(
          arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB,
        true, verbose_logger, err);
    if (gt_error_is_set(err))
      had_err = -1;
    else
    {
      nofreads = gt_encseq_num_of_sequences(ssar->encseq);
      if (!arguments->singlestrand)
      {
        nofreads = GT_DIV2(nofreads);
        firstrevcompl = nofreads;
      }
      GT_INITBITTAB(bits, nofreads);
      if (!arguments->singlestrand)
      if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH)
        readlength = gt_encseq_seqlength(ssar->encseq, 0);
      (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0
          : firstrevcompl, readlength);
    }
    if (ssar != NULL)
      gt_freeSequentialsuffixarrayreader(&ssar);
    gt_logger_delete(verbose_logger);
  }
  else
  {
    gt_assert(false);
  }
  if (!had_err)
    had_err = gt_cntlist_show(bits, nofreads, NULL, false, err);
  gt_free(bits);
  return had_err;
}
Пример #18
0
static bool gt_mmsearch(const GtEncseq *dbencseq,
                        GtEncseqReader *esr,
                        const ESASuffixptr *suftab,
                        GtReadmode readmode,
                        Lcpinterval *lcpitv,
                        const GtQuerysubstring *querysubstring,
                        GtUword minmatchlength)
{
  GtUword left, leftsave, mid, right, lpref, rpref,
                totallength, lcplen, sidx;
  int retcode = 0;
  GtUchar currentdbchar, currentquerychar;

  totallength = gt_encseq_total_length(dbencseq);
  leftsave = left = lcpitv->left;
  right = lcpitv->right;
  lcplen = lcpitv->offset;
  GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,left),lcplen);
  if (retcode > 0)
  {
    lpref = lcplen;
    lcplen = lcpitv->offset;
    GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,right),lcplen);
    if (retcode > 0)
    {
      return false;
    } else
    {
      rpref = lcplen;
      while (right > left + 1)
      {
        mid = GT_DIV2(left+right);
        lcplen = MIN(lpref,rpref);
        GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,mid),lcplen);
        if (retcode <= 0)
        {
          right = mid;
          rpref = lcplen;
        } else
        {
          left = mid;
          lpref = lcplen;
        }
      }
      lcpitv->left = right;
    }
  }

  left = leftsave;
  right = lcpitv->right;
  lcplen = lcpitv->offset;
  GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,left),lcplen);
  if (retcode < 0)
  {
    return false;
  } else
  {
    lpref = lcplen;
    lcplen = lcpitv->offset;
    GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,right),lcplen);
    if (retcode >= 0)
    {
      lcpitv->right = right;
    } else
    {
      rpref = lcplen;
      while (right > left + 1)
      {
        mid = GT_DIV2(left+right);
        lcplen = MIN(lpref,rpref);
        GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,mid),lcplen);
        if (retcode >= 0)
        {
          left = mid;
          lpref = lcplen;
        } else
        {
          right = mid;
          rpref = lcplen;
        }
      }
      lcpitv->right = left;
    }
  }
  return true;
}
Пример #19
0
double *gt_encseq_get_gc(const GtEncseq *encseq,
                         bool with_special,
                         bool calculate,
                         GT_UNUSED GtError *err)
{
  GtEncseqReader *reader;
  GtAlphabet *alphabet;
  double *gc_content;
  /* unit = file or sequence depending on per_file */
  unsigned long char_idx, totallength, max_unit,
                seq_idx = 0,
                nextsep = 0,
                at_count = 0,
                gc_count = 0,
                default_count = 0;
  bool is_mirrored_encseq;
  GtUchar acgt[8], current_c;

  alphabet = gt_encseq_alphabet(encseq);
  gt_assert(gt_alphabet_is_dna(alphabet));
  gt_alphabet_encode_seq(alphabet, acgt,
                         "aAtTcCgG", 8UL);
  totallength = gt_encseq_total_length(encseq);
  reader = gt_encseq_create_reader_with_readmode(encseq,
                                                 GT_READMODE_FORWARD,
                                                 0);
  is_mirrored_encseq = gt_encseq_is_mirrored(encseq);
  if (is_mirrored_encseq)
  {
    max_unit = GT_DIV2(gt_encseq_num_of_sequences(encseq));
    gc_content = gt_calloc((size_t) GT_MULT2(max_unit), sizeof (double));
  }
  else
  {
    max_unit = gt_encseq_num_of_sequences(encseq);
    gc_content = gt_calloc((size_t) max_unit, sizeof (double));
  }

  nextsep = gt_encseq_seqstartpos(encseq, seq_idx) +
            gt_encseq_seqlength(encseq, seq_idx);

  for (char_idx = 0; char_idx < totallength; char_idx++)
  {
    if (nextsep == char_idx)
    {
      if (calculate)
      {
        calculate_gc(encseq,
                     gc_content,
                     with_special,
                     seq_idx,
                     gc_count,
                     at_count);
      }
      else
      {
        gc_content[seq_idx] = (double) gc_count;
      }

      seq_idx++;

      nextsep = gt_encseq_seqstartpos(encseq, seq_idx) +
                gt_encseq_seqlength(encseq, seq_idx);

      gt_encseq_reader_reinit_with_readmode(reader,
                                            encseq,
                                            GT_READMODE_FORWARD,
                                            char_idx + 1UL);
      gc_count = at_count = default_count = 0UL;
      continue;
    }
    current_c = gt_encseq_reader_next_encoded_char(reader);
    if (current_c == acgt[0] ||
        current_c == acgt[1] ||
        current_c == acgt[2] ||
        current_c == acgt[3])
    {
       at_count++;
    }
    else
    {
      if (current_c == acgt[4] ||
          current_c == acgt[5] ||
          current_c == acgt[6] ||
          current_c == acgt[7])
      {
         gc_count++;
      }
      else
      {
        default_count++;
      }
    }
  }
  if (calculate)
  {
    calculate_gc(encseq,
                 gc_content,
                 with_special,
                 seq_idx,
                 gc_count,
                 at_count);
  }
  else
  {
    gc_content[seq_idx] = (double) gc_count;
  }
  gt_encseq_reader_delete(reader);
  if (is_mirrored_encseq)
  {
    unsigned long double_max_unit = GT_MULT2(max_unit);
    for (seq_idx = 0; seq_idx < max_unit; seq_idx++)
    {
      gc_content[double_max_unit - seq_idx - 1] =
        gc_content[seq_idx];
    }
  }
  return gc_content;
}
Пример #20
0
int gt_intset_16_unit_test(GtError *err)
{
  int had_err = 0;
  GtIntset *is;
  GtUword num_of_elems = gt_rand_max(((GtUword) 1) << 10) + 1,
          *arr = gt_malloc(sizeof (*arr) * num_of_elems),
          stepsize = GT_DIV2(num_of_elems <<4 / num_of_elems),
          idx;
  size_t is_size;

  gt_error_check(err);

  arr[0] = gt_rand_max(stepsize) + 1;
  for (idx = (GtUword) 1; idx < num_of_elems; ++idx) {
    arr[idx] = arr[idx - 1] + gt_rand_max(stepsize) + 1;
  }

  is_size =     gt_intset_16_size_of_rep(arr[num_of_elems - 1], num_of_elems);

  if (!had_err) {
    if (is_size < (size_t) UINT_MAX) {
      is = gt_intset_16_new(arr[num_of_elems - 1], num_of_elems);
      for (idx = 0; idx < num_of_elems; idx++) {
        gt_intset_16_add(is, arr[idx]);
        gt_ensure(idx + 1 == gt_intset_16_size(is));
        if (idx < num_of_elems - 1)
          gt_ensure(gt_intset_16_get_idx_smallest_geq(is,
                                                      arr[idx] + 1) ==
                    num_of_elems);
      }

      gt_ensure(gt_intset_16_elems_is_valid(is));
      gt_ensure(gt_intset_16_secstart_is_valid(is));

      for (idx = 0; !had_err && idx < num_of_elems; idx++) {
        if (arr[idx] != 0 && arr[idx - 1] != (arr[idx] - 1)) {
          gt_ensure(
            gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] - 1) ==
            idx);
          gt_ensure(
            gt_intset_16_get_idx_smallest_geq(is, arr[idx] - 1) ==
            idx);
        }
        gt_ensure(gt_intset_16_get_test(is, idx) == arr[idx]);
        gt_ensure(gt_intset_16_get(is, idx) == arr[idx]);
        gt_ensure(
          gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] + 1) ==
          idx + 1);
        gt_ensure(
          gt_intset_16_get_idx_smallest_geq(is, arr[idx] + 1) ==
          idx + 1);
      }
      if (!had_err)
        had_err = gt_intset_unit_test_notinset(is, 0, arr[0] - 1, err);
      if (!had_err)
        had_err = gt_intset_unit_test_check_seqnum(is, 0, arr[0] - 1, 0, err);
      for (idx = (GtUword) 1; !had_err && idx < num_of_elems; idx++) {
        had_err = gt_intset_unit_test_notinset(is, arr[idx - 1] + 1,
                                               arr[idx] - 1, err);
        if (!had_err)
          had_err = gt_intset_unit_test_check_seqnum(is, arr[idx - 1] + 1,
                                                     arr[idx] - 1, idx, err);
      }
      gt_intset_delete(is);
    }
  }
  gt_free(arr);
  return had_err;
}
Пример #21
0
/* calculating alignment in linear space */
GtUword gt_calc_linearalign(LinspaceManagement *spacemanager,
                            GtScoreHandler *scorehandler,
                            GtAlignment *align,
                            const GtUchar *useq,
                            GtUword ustart,
                            GtUword ulen,
                            const GtUchar *vseq,
                            GtUword vstart,
                            GtUword vlen)
{
  GtUword distance, gapcost, *Ctab, *EDtabcolumn, *Rtabcolumn, threadcount = 1;

  gt_assert(scorehandler);
  gt_linspaceManagement_set_ulen(spacemanager,ulen);
  gapcost = gt_scorehandler_get_gapscore(scorehandler);

  if (ulen == 0UL)
  {
    return construct_trivial_insertion_alignment(align,vlen,gapcost);
  }
  else if (vlen == 0UL)
  {
    return construct_trivial_deletion_alignment(align,ulen,gapcost);
  }
  else if (vlen == 1UL)
  {
    gt_linspaceManagement_check(spacemanager,(ulen+1)*(vlen+1)-1,ulen,
                                sizeof (*EDtabcolumn), sizeof (EDtabcolumn), 0);
    return alignment_in_square_space_generic(spacemanager, align,
                                             useq, ustart, ulen,
                                             vseq, vstart, vlen, scorehandler);
  }
  else if (gt_linspaceManagement_checksquare(spacemanager, ulen, vlen,
                                             sizeof (*EDtabcolumn),
                                             sizeof (*Rtabcolumn)))
  { /* call 2dim */
    return alignment_in_square_space_generic(spacemanager, align,
                                             useq, ustart, ulen,
                                             vseq, vstart, vlen, scorehandler);
  }

#ifdef GT_THREADS_ENABLED
  gt_linspaceManagement_check(spacemanager, ulen + GT_DIV2(vlen), vlen,
                              sizeof (*EDtabcolumn), sizeof (*Rtabcolumn),
                              sizeof (*Ctab));
#else
  gt_linspaceManagement_check(spacemanager, ulen, vlen, sizeof (*EDtabcolumn),
                              sizeof (*Rtabcolumn), sizeof (*Ctab));
#endif
  Ctab = gt_linspaceManagement_get_crosspointTabspace(spacemanager);

  Ctab[vlen] = ulen;
  distance = evaluatelinearcrosspoints(spacemanager, scorehandler,
                                       useq, ustart, ulen,
                                       vseq, vstart, vlen,
                                       Ctab, 0, 0, &threadcount);

  determineCtab0(Ctab, scorehandler, vseq[vstart], useq, ustart);
  reconstructalignment_from_Ctab(align, Ctab, useq, ustart, vseq, vstart,
                                 vlen, scorehandler);

  return distance;
}
Пример #22
0
/* evaluate crosspoints in recursive way */
static GtUword evaluatelinearcrosspoints(LinspaceManagement *spacemanager,
                                         GtScoreHandler *scorehandler,
                                         const GtUchar *useq,
                                         GtUword ustart, GtUword ulen,
                                         const GtUchar *vseq,
                                         GtUword vstart, GtUword vlen,
                                         GtUword *Ctab,
                                         GtUword rowoffset,
                                         GT_UNUSED GtUword threadidx,
                                         GT_UNUSED GtUword *threadcount)
{
  GtUword midrow, midcol, distance, *EDtabcolumn = NULL, *Rtabcolumn = NULL;
#ifdef GT_THREADS_ENABLED
  GtThread *t1 = NULL, *t2 = NULL;
  GtLinearCrosspointthreadinfo threadinfo1, threadinfo2;
#endif

  if (vlen >= 2UL)
  {
    if (ulen == 0)
    {
      GtUword i;
      for (i = 0; i <= vlen; i++)
        Ctab[i] = rowoffset;
      return rowoffset;
    }
#ifndef GT_THREADS_ENABLED
    if (gt_linspaceManagement_checksquare(spacemanager, ulen,vlen,
                                               sizeof (GtUword),
                                               sizeof (Rtabcolumn)))
    { /* product of subsquences is lower than space allocated already or
       * lower than timesquarfactor * ulen*/
      return ctab_in_square_space(spacemanager, scorehandler, Ctab, useq,
                                  ustart, ulen, vseq, vstart, vlen, rowoffset);
    }
#endif

    midcol = GT_DIV2(vlen);
    Rtabcolumn = gt_linspaceManagement_get_rTabspace(spacemanager);
    EDtabcolumn = gt_linspaceManagement_get_valueTabspace(spacemanager);
    Rtabcolumn = Rtabcolumn + rowoffset + threadidx;
    EDtabcolumn = EDtabcolumn + rowoffset + threadidx;

    distance = evaluateallEDtabRtabcolumns(EDtabcolumn, Rtabcolumn,
                                           scorehandler, midcol,
                                           useq, ustart, ulen,
                                           vseq, vstart, vlen);
    midrow = Rtabcolumn[ulen];
    Ctab[midcol] = rowoffset + midrow;

#ifdef GT_THREADS_ENABLED
    if (*threadcount + 2 > gt_jobs)
    {
#endif
      /* upper left corner */
      (void) evaluatelinearcrosspoints(spacemanager, scorehandler,
                                       useq, ustart, midrow,
                                       vseq, vstart, midcol,
                                       Ctab, rowoffset,
                                       threadidx, threadcount);

      /* bottom right corner */
      (void) evaluatelinearcrosspoints(spacemanager, scorehandler,
                                       useq, ustart + midrow,
                                       ulen - midrow,
                                       vseq, vstart + midcol,
                                       vlen - midcol,
                                       Ctab + midcol,
                                       rowoffset + midrow,
                                       threadidx, threadcount);
#ifdef GT_THREADS_ENABLED
    }
    else
    {
      threadinfo1 = set_LinearCrosspointthreadinfo(spacemanager, scorehandler,
                                                   useq, ustart, midrow,
                                                   vseq, vstart, midcol,
                                                   Ctab, rowoffset,
                                                   threadidx, threadcount);
      (*threadcount)++;
      t1 = gt_thread_new(evaluatelinearcrosspoints_thread_caller,
                         &threadinfo1, NULL);

      threadinfo2 = set_LinearCrosspointthreadinfo(spacemanager, scorehandler,
                                                   useq, ustart + midrow,
                                                   ulen - midrow,
                                                   vseq, vstart + midcol,
                                                   vlen - midcol,
                                                   Ctab + midcol,
                                                   rowoffset + midrow,
                                                   threadidx + GT_DIV2(midcol),
                                                   threadcount);
      (*threadcount)++;
      t2 = gt_thread_new(evaluatelinearcrosspoints_thread_caller,
                         &threadinfo2, NULL);

      gt_thread_join(t1);
      (*threadcount)--;
      gt_thread_join(t2);
      (*threadcount)--;
      gt_thread_delete(t1);
      gt_thread_delete(t2);
    }
#endif
    return distance;
  }
  return 0;
}
Пример #23
0
GtScoreHandler *gt_scorehandler2costhandler(const GtScoreHandler *scorehandler)
{
  GtScoreHandler *costhandler;

  gt_assert(scorehandler != NULL);
  if (scorehandler->scorematrix == NULL)
  {
    GtWord matchscore, mismatchscore, gap_extension, gap_opening,
           maxscore = MAX(MAX(GT_DIV2(scorehandler->matchscore+1),
                         GT_DIV2(scorehandler->mismatchscore+1)),
                     MAX(1 + scorehandler->gap_extension,0));

    matchscore = 2 * maxscore - scorehandler->matchscore;
    mismatchscore = 2 * maxscore - scorehandler->mismatchscore;
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new(matchscore,
                                      mismatchscore,
                                      gap_opening,
                                      gap_extension);
    if (!scorehandler->mappedsequence)
    {
      gt_scorehandler_plain(costhandler);
    }
  } else
  {
    int maxscore;
    GtWord gap_extension, gap_opening;
    unsigned int i, j,
                 dim = gt_score_matrix_get_dimension(scorehandler->scorematrix);
    GtScoreMatrix *costmatrix
      = gt_score_matrix_clone_empty(scorehandler->scorematrix);

    for (maxscore = 0, i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        int val = gt_score_matrix_get_score(scorehandler->scorematrix, i, j);

        if (val > maxscore)
        {
          maxscore = val;
        }
      }
    }
    maxscore = MAX(GT_DIV2(maxscore+1), 1 + scorehandler->gap_extension);
    for (i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        /* translate */
        int score = gt_score_matrix_get_score(scorehandler->scorematrix,i,j);
        gt_score_matrix_set_score(costmatrix, i, j, 2 * maxscore - score);
      }
    }
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new( 0,0, gap_opening, gap_extension);
    gt_scorehandler_add_scorematrix(costhandler,costmatrix);
  }
  return costhandler;
}
Пример #24
0
static GtUword gt_wtree_encseq_rank_rec(GtWtreeEncseq *we,
                                        GtUword pos,
                                        GtWtreeSymbol sym,
                                        GtUword node_start,
                                        GtUword node_size,
                                        unsigned int alpha_start,
                                        unsigned int alpha_end)
{
  unsigned int middle = GT_DIV2(alpha_start + alpha_end);
  int bit;
  GtUword zero_rank_prefix = 0,
          one_rank_prefix = 0,
          left_child_size,
          rank;
  gt_log_log("alphabet: %u-%u-%u, sym: " GT_WU,
             alpha_start, middle, alpha_end, (GtUword) sym);
  gt_log_log("pos: "GT_WU"", pos);
  gt_assert(pos < node_size);

  if (alpha_start < alpha_end) {
    bit = middle < (unsigned int) sym ? 1 : 0;
    if (node_start != 0)
      zero_rank_prefix =
        gt_compressed_bitsequence_rank_0(we->c_bits, node_start - 1);
    left_child_size =
      gt_compressed_bitsequence_rank_0(we->c_bits, node_start + node_size - 1) -
      zero_rank_prefix;

    if (bit == 0) {
      rank = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + pos) -
        zero_rank_prefix;
      alpha_end = middle;
      node_start += we->parent_instance.members->length;
      node_size = left_child_size;
    }
    else {
      if (node_start != 0)
        one_rank_prefix =
          gt_compressed_bitsequence_rank_1(we->c_bits, node_start - 1);
      rank = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + pos) -
        one_rank_prefix;
      alpha_start = middle + 1;
      node_size =
        gt_compressed_bitsequence_rank_1(we->c_bits,
                                         node_start + node_size - 1) -
        one_rank_prefix;
      node_start +=
        we->parent_instance.members->length + left_child_size;
    }
    gt_log_log("bit: %d, nodesize: "GT_WU"", bit, node_size);
    if (node_size != 0 && rank != 0) {
      pos = rank - 1;
      return gt_wtree_encseq_rank_rec(we, pos, sym,
                                      node_start, node_size, alpha_start,
                                      alpha_end);
    }
    return 0;
  }
  gt_log_log("found: rank="GT_WU"", pos + 1);
  return pos + 1; /* convert position to count */
}