void gt_out_power_for_small_exponents(void) { unsigned int exponent; for (exponent=1U; exponent<64U; exponent++) { printf("pow(2UL,%u)=%lu\n",exponent, gt_power_for_small_exponents(2U,exponent)); } for (exponent=1U; exponent<32U; exponent++) { printf("pow(4UL,%u)=%lu\n",exponent, gt_power_for_small_exponents(4U,exponent)); } for (exponent=1U; exponent<16U; exponent++) { printf("pow(8UL,%u)=%lu\n",exponent, gt_power_for_small_exponents(8U,exponent)); } for (exponent=1U; exponent<32U; exponent++) { printf("pow(3UL,%u)=%lu\n",exponent, gt_power_for_small_exponents(3U,exponent)); } }
static void fillanysubbuckets(GtBucketspec2 *bucketspec2, const GtBcktab *bcktab) { GtCodetype code2, maxcode; unsigned int rightchar = 0, currentchar = 0; GtUword rightbound, *specialchardist; maxcode = gt_bcktab_numofallcodes(bcktab) - 1; bucketspec2->expandfactor = (GtCodetype) gt_power_for_small_exponents(bucketspec2->numofchars, bucketspec2->prefixlength-2); bucketspec2->expandfillsum = gt_bcktab_filltable(bcktab,2U); #ifdef SHOWBUCKETSPEC2 showexpandcode(bucketspec2,bucketspec2->prefixlength); #endif specialchardist = leftcontextofspecialchardist(bucketspec2->numofchars, bucketspec2->encseq, bucketspec2->readmode); for (code2 = 0; code2 < (GtCodetype) bucketspec2->numofcharssquared; code2++) { GtCodetype ecode = expandtwocharcode(code2,bucketspec2); gt_assert(ecode / bucketspec2->expandfactor == code2); rightbound = gt_bcktab_calcrightbounds(bcktab, ecode, maxcode, bucketspec2->partwidth); rightchar = (unsigned int) ((code2+1) % bucketspec2->numofchars); gt_assert((GtCodetype) currentchar == code2 / bucketspec2->numofchars); if (rightchar == 0) { gt_assert(rightbound >= specialchardist[currentchar]); gt_assert((GtCodetype) (bucketspec2->numofchars-1) == code2 % bucketspec2->numofchars); bucketspec2->subbuckettab[currentchar] [bucketspec2->numofchars-1].bucketend = rightbound - specialchardist[currentchar]; bucketspec2->superbuckettab[currentchar].bucketend = rightbound; currentchar++; } else { gt_assert((GtCodetype) (rightchar-1) == code2 % bucketspec2->numofchars); bucketspec2->subbuckettab[currentchar][rightchar-1].bucketend = rightbound; } } gt_free(specialchardist); }
void gt_computefmkeyvalues (Fmindex *fm, const GtSpecialcharinfo *specialcharinfo, GtUword bwtlength, unsigned int log2bsize, unsigned int log2markdist, unsigned int numofchars, unsigned int suffixlength, bool storeindexpos) { fm->mappedptr = NULL; fm->log2bsize = log2bsize; fm->log2markdist = log2markdist; fm->bwtlength = bwtlength; fm->log2superbsize = GT_MULT2 (fm->log2bsize); fm->bsize = (unsigned int) GT_POW2 (fm->log2bsize); fm->bsizehalve = GT_DIV2(fm->bsize); fm->superbsize = (unsigned int) GT_POW2 (fm->log2superbsize); fm->nofblocks = (GtUword) (fm->bwtlength / fm->bsize) + 1; fm->nofsuperblocks = (GtUword) (fm->bwtlength / fm->superbsize) + 2; fm->markdist = (GtUword) GT_POW2 (fm->log2markdist); fm->markdistminus1 = (GtUword) (fm->markdist - 1); fm->negatebsizeones = ~ (GtUword) (fm->bsize - 1); fm->negatesuperbsizeones = ~ (GtUword) (fm->superbsize - 1); fm->log2superbsizeminuslog2bsize = fm->log2superbsize - fm->log2bsize; fm->mapsize = numofchars+1; fm->suffixlength = suffixlength; if (fm->suffixlength > 0) { fm->numofcodes = gt_power_for_small_exponents(fm->mapsize-1, fm->suffixlength); } else { fm->numofcodes = 0; } fm->sizeofindex = determinefmindexsize (fm, specialcharinfo, suffixlength, storeindexpos); }
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtKmerDatabaseArguments *arguments = tool_arguments; int had_err = 0; GtEncseq *es; GtUword es_length, nu_kmer_codes = 0; GtKmerDatabase *compare_db = NULL, *db = NULL; GtLogger *logger; FILE *fp = NULL; GtHashmap *kmer_hash = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); if (arguments->use_hash) kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL, (GtFree) gt_kmer_database_delete_hash_value); if (arguments->bench) timer = gt_timer_new_with_progress_description("loading encoded sequence"); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) { fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err); gt_logger_set_target(logger, fp); } if (!had_err) { GtEncseqLoader *es_l; if (arguments->bench) gt_timer_start(timer); es_l = gt_encseq_loader_new(); es = gt_encseq_loader_load(es_l, argv[parsed_args], err); if (arguments->bench) gt_timer_show_progress(timer, "saving kmers (+iterating over file)", stdout); if (es == NULL) { had_err = -1; } gt_encseq_loader_delete(es_l); } if (!had_err) { es_length = gt_encseq_total_length(es); if (es_length < (GtUword) arguments->kmersize) { gt_error_set(err, "Input is too short for used kmersize. File length: " GT_WU " kmersize: %u", es_length, arguments->kmersize); had_err = -1; } } if (!had_err) { GtAlphabet *alphabet; alphabet = gt_encseq_alphabet(es); if (arguments->bench) nu_kmer_codes = gt_power_for_small_exponents( gt_alphabet_num_of_chars(alphabet), arguments->kmersize); if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) { compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet), arguments->kmersize, arguments->sb_size, es); } if (!arguments->use_hash) { db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet), arguments->kmersize, arguments->sb_size, es); if (arguments->cutoff) { if (arguments->mean_cutoff) gt_kmer_database_use_mean_cutoff(db, (GtUword) 2, arguments->cutoff_value); else gt_kmer_database_set_cutoff(db, arguments->cutoff_value); if (!arguments->prune) gt_kmer_database_set_prune(db); } } } if (!had_err) { GtUword startpos = 0, endpos; GtKmercodeiterator *iter; const GtKmercode *kmercode = NULL; iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD, arguments->kmersize, 0); while (!had_err && startpos < es_length - (arguments->kmersize - 1)) { GtUword startpos_add_kmer = startpos; if (arguments->merge_only) { endpos = startpos + (arguments->kmersize - 1) + (gt_rand_max((arguments->sb_size - 1) * 2)); if (endpos > es_length) endpos = es_length; } else { endpos = startpos + (arguments->kmersize - 1) + (gt_rand_max(arguments->sb_size - 1)); } gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos); while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL && startpos_add_kmer <= endpos - (arguments->kmersize - 1)) { if (!arguments->merge_only && !arguments->use_hash && !kmercode->definedspecialposition && !arguments->bench) { gt_kmer_database_add_kmer(compare_db, kmercode->code, startpos_add_kmer); } if (arguments->use_hash && !kmercode->definedspecialposition) { gt_kmer_database_add_to_hash(kmer_hash, kmercode->code, startpos_add_kmer); } startpos_add_kmer++; } if (!arguments->use_hash) { gt_kmer_database_add_interval(db, startpos, endpos); gt_kmer_database_print_buffer(db, logger); if (!arguments->bench) had_err = gt_kmer_database_check_consistency(db, err); } startpos = endpos + 1; } if (!arguments->use_hash) { gt_kmer_database_flush(db); gt_kmer_database_print_buffer(db, logger); if (!had_err && !arguments->bench) had_err = gt_kmer_database_check_consistency(db, err); if (!arguments->merge_only && !had_err && !arguments->bench) had_err = gt_kmer_database_check_consistency(compare_db, err); if (!arguments->merge_only && !arguments->bench) gt_kmer_database_print(compare_db, logger, true); if (!arguments->merge_only && !had_err && !arguments->bench) had_err = gt_kmer_database_compare(compare_db, db, err); gt_kmer_database_print(db, logger, true); } gt_kmercodeiterator_delete(iter); } if (arguments->bench) { GtKmerStartpos pos; GtArrayGtUword *pos_hash; GtUword rand_access = (GtUword) 50000000, rand_code, i, sum = 0; gt_timer_show_progress(timer, "random access", stdout); for (i = 0; i < rand_access; i++) { rand_code = gt_rand_max(nu_kmer_codes - 1); if (arguments->use_hash) { pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code); if (pos_hash != NULL) sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1]; } else { pos = gt_kmer_database_get_startpos(db, rand_code); if (pos.no_positions > 0) sum += pos.startpos[pos.no_positions - 1]; } } printf("sum: " GT_WU "\n", sum); gt_timer_show_progress(timer, "", stdout); gt_timer_stop(timer); gt_timer_delete(timer); } if (arguments->use_hash) gt_hashmap_delete(kmer_hash); gt_encseq_delete(es); if (!arguments->use_hash) gt_kmer_database_delete(db); if (!arguments->merge_only && !arguments->bench) gt_kmer_database_delete(compare_db); gt_logger_delete(logger); gt_fa_fclose(fp); return had_err; }