Exemplo n.º 1
0
void gt_out_power_for_small_exponents(void)
{
  unsigned int exponent;

  for (exponent=1U; exponent<64U; exponent++)
  {
    printf("pow(2UL,%u)=%lu\n",exponent,
            gt_power_for_small_exponents(2U,exponent));
  }
  for (exponent=1U; exponent<32U; exponent++)
  {
    printf("pow(4UL,%u)=%lu\n",exponent,
            gt_power_for_small_exponents(4U,exponent));
  }
  for (exponent=1U; exponent<16U; exponent++)
  {
    printf("pow(8UL,%u)=%lu\n",exponent,
            gt_power_for_small_exponents(8U,exponent));
  }
  for (exponent=1U; exponent<32U; exponent++)
  {
    printf("pow(3UL,%u)=%lu\n",exponent,
            gt_power_for_small_exponents(3U,exponent));
  }
}
Exemplo n.º 2
0
static void fillanysubbuckets(GtBucketspec2 *bucketspec2,
                              const GtBcktab *bcktab)
{
  GtCodetype code2, maxcode;
  unsigned int rightchar = 0, currentchar = 0;
  GtUword rightbound, *specialchardist;

  maxcode = gt_bcktab_numofallcodes(bcktab) - 1;
  bucketspec2->expandfactor
    = (GtCodetype) gt_power_for_small_exponents(bucketspec2->numofchars,
                                                bucketspec2->prefixlength-2);
  bucketspec2->expandfillsum = gt_bcktab_filltable(bcktab,2U);
#ifdef SHOWBUCKETSPEC2
  showexpandcode(bucketspec2,bucketspec2->prefixlength);
#endif
  specialchardist = leftcontextofspecialchardist(bucketspec2->numofchars,
                                                 bucketspec2->encseq,
                                                 bucketspec2->readmode);
  for (code2 = 0; code2 < (GtCodetype) bucketspec2->numofcharssquared; code2++)
  {
    GtCodetype ecode = expandtwocharcode(code2,bucketspec2);
    gt_assert(ecode / bucketspec2->expandfactor == code2);
    rightbound = gt_bcktab_calcrightbounds(bcktab,
                                           ecode,
                                           maxcode,
                                           bucketspec2->partwidth);
    rightchar = (unsigned int) ((code2+1) % bucketspec2->numofchars);
    gt_assert((GtCodetype) currentchar == code2 / bucketspec2->numofchars);
    if (rightchar == 0)
    {
      gt_assert(rightbound >= specialchardist[currentchar]);
      gt_assert((GtCodetype) (bucketspec2->numofchars-1) ==
                code2 % bucketspec2->numofchars);
      bucketspec2->subbuckettab[currentchar]
                               [bucketspec2->numofchars-1].bucketend
        = rightbound - specialchardist[currentchar];
      bucketspec2->superbuckettab[currentchar].bucketend = rightbound;
      currentchar++;
    } else
    {
      gt_assert((GtCodetype) (rightchar-1) == code2 % bucketspec2->numofchars);
      bucketspec2->subbuckettab[currentchar][rightchar-1].bucketend
        = rightbound;
    }
  }
  gt_free(specialchardist);
}
Exemplo n.º 3
0
void gt_computefmkeyvalues (Fmindex *fm,
                            const GtSpecialcharinfo *specialcharinfo,
                            GtUword bwtlength,
                            unsigned int log2bsize,
                            unsigned int log2markdist,
                            unsigned int numofchars,
                            unsigned int suffixlength,
                            bool storeindexpos)
{
  fm->mappedptr = NULL;
  fm->log2bsize = log2bsize;
  fm->log2markdist = log2markdist;
  fm->bwtlength = bwtlength;
  fm->log2superbsize = GT_MULT2 (fm->log2bsize);
  fm->bsize = (unsigned int) GT_POW2 (fm->log2bsize);
  fm->bsizehalve = GT_DIV2(fm->bsize);
  fm->superbsize = (unsigned int) GT_POW2 (fm->log2superbsize);
  fm->nofblocks = (GtUword) (fm->bwtlength / fm->bsize) + 1;
  fm->nofsuperblocks = (GtUword) (fm->bwtlength / fm->superbsize) + 2;
  fm->markdist = (GtUword) GT_POW2 (fm->log2markdist);
  fm->markdistminus1 = (GtUword) (fm->markdist - 1);
  fm->negatebsizeones = ~ (GtUword) (fm->bsize - 1);
  fm->negatesuperbsizeones = ~ (GtUword) (fm->superbsize - 1);
  fm->log2superbsizeminuslog2bsize = fm->log2superbsize - fm->log2bsize;
  fm->mapsize = numofchars+1;
  fm->suffixlength = suffixlength;
  if (fm->suffixlength > 0)
  {
    fm->numofcodes = gt_power_for_small_exponents(fm->mapsize-1,
                                                  fm->suffixlength);
  } else
  {
    fm->numofcodes = 0;
  }
  fm->sizeofindex = determinefmindexsize (fm,
                                          specialcharinfo,
                                          suffixlength,
                                          storeindexpos);
}
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv,
                                   int parsed_args, void *tool_arguments,
                                   GtError *err)
{
  GtKmerDatabaseArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseq       *es;
  GtUword        es_length,
                 nu_kmer_codes = 0;
  GtKmerDatabase *compare_db = NULL,
                 *db = NULL;
  GtLogger *logger;
  FILE *fp = NULL;
  GtHashmap *kmer_hash = NULL;
  GtTimer *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->use_hash)
    kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL,
                               (GtFree) gt_kmer_database_delete_hash_value);
  if (arguments->bench)
    timer = gt_timer_new_with_progress_description("loading encoded sequence");

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) {
    fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err);
    gt_logger_set_target(logger, fp);
  }

  if (!had_err) {
    GtEncseqLoader *es_l;
    if (arguments->bench)
      gt_timer_start(timer);
    es_l = gt_encseq_loader_new();
    es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->bench)
      gt_timer_show_progress(timer, "saving kmers (+iterating over file)",
                             stdout);
    if (es == NULL) {
      had_err = -1;
    }
    gt_encseq_loader_delete(es_l);
  }
  if (!had_err) {
    es_length = gt_encseq_total_length(es);
    if (es_length < (GtUword) arguments->kmersize) {
      gt_error_set(err, "Input is too short for used kmersize. File length: "
                   GT_WU " kmersize: %u", es_length, arguments->kmersize);
      had_err = -1;
    }
  }
  if (!had_err) {
    GtAlphabet *alphabet;
    alphabet = gt_encseq_alphabet(es);
    if (arguments->bench)
    nu_kmer_codes = gt_power_for_small_exponents(
                                            gt_alphabet_num_of_chars(alphabet),
                                            arguments->kmersize);
    if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) {
      compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize, arguments->sb_size, es);
    }
    if (!arguments->use_hash) {
      db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize,
                                arguments->sb_size, es);
      if (arguments->cutoff) {
        if (arguments->mean_cutoff)
          gt_kmer_database_use_mean_cutoff(db, (GtUword) 2,
                                           arguments->cutoff_value);
        else
          gt_kmer_database_set_cutoff(db, arguments->cutoff_value);
        if (!arguments->prune)
          gt_kmer_database_set_prune(db);
      }
    }
  }

  if (!had_err) {
    GtUword startpos = 0,
            endpos;
    GtKmercodeiterator *iter;
    const GtKmercode *kmercode = NULL;
    iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD,
                                          arguments->kmersize, 0);
    while (!had_err && startpos < es_length - (arguments->kmersize - 1)) {
      GtUword startpos_add_kmer = startpos;
      if (arguments->merge_only) {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max((arguments->sb_size - 1) * 2));
        if (endpos > es_length)
          endpos = es_length;
      }
      else {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max(arguments->sb_size - 1));
      }
      gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos);
      while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL &&
             startpos_add_kmer <= endpos - (arguments->kmersize - 1)) {
        if (!arguments->merge_only && !arguments->use_hash &&
            !kmercode->definedspecialposition && !arguments->bench) {
          gt_kmer_database_add_kmer(compare_db, kmercode->code,
                                    startpos_add_kmer);
        }
        if (arguments->use_hash && !kmercode->definedspecialposition) {
          gt_kmer_database_add_to_hash(kmer_hash, kmercode->code,
                                       startpos_add_kmer);
        }
        startpos_add_kmer++;
      }
      if (!arguments->use_hash) {
        gt_kmer_database_add_interval(db, startpos, endpos);
        gt_kmer_database_print_buffer(db, logger);
        if (!arguments->bench)
          had_err = gt_kmer_database_check_consistency(db, err);
      }
      startpos = endpos + 1;
    }
    if (!arguments->use_hash) {
      gt_kmer_database_flush(db);
      gt_kmer_database_print_buffer(db, logger);
      if (!had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(db, err);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(compare_db, err);
      if (!arguments->merge_only && !arguments->bench)
        gt_kmer_database_print(compare_db, logger, true);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_compare(compare_db, db, err);
      gt_kmer_database_print(db, logger, true);
    }
    gt_kmercodeiterator_delete(iter);
  }

  if (arguments->bench) {
    GtKmerStartpos pos;
    GtArrayGtUword *pos_hash;
    GtUword rand_access = (GtUword) 50000000,
            rand_code,
            i,
            sum = 0;
    gt_timer_show_progress(timer, "random access", stdout);
    for (i = 0; i < rand_access; i++) {
      rand_code = gt_rand_max(nu_kmer_codes - 1);
      if (arguments->use_hash) {
        pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code);
        if (pos_hash != NULL)
          sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1];
      }
      else {
        pos = gt_kmer_database_get_startpos(db, rand_code);
        if (pos.no_positions > 0)
          sum += pos.startpos[pos.no_positions - 1];
      }
    }
    printf("sum: " GT_WU "\n", sum);

    gt_timer_show_progress(timer, "", stdout);
    gt_timer_stop(timer);
    gt_timer_delete(timer);
  }
  if (arguments->use_hash)
    gt_hashmap_delete(kmer_hash);
  gt_encseq_delete(es);
  if (!arguments->use_hash)
    gt_kmer_database_delete(db);
  if (!arguments->merge_only && !arguments->bench)
    gt_kmer_database_delete(compare_db);
  gt_logger_delete(logger);
  gt_fa_fclose(fp);

  return had_err;
}