int gt_priority_queue_unit_test(GtError *err) { int had_err = 0; unsigned long idx, maxsize = 10000UL, trials = 1000UL, *numbers = gt_malloc(sizeof *numbers * maxsize), *numbers_copy = gt_malloc(sizeof *numbers_copy * maxsize); unsigned long arr[] = {76UL, 132UL, 136UL, 538UL, 545UL, 401UL}; gt_error_check (err); gt_priority_sort(arr,(unsigned long) sizeof arr/sizeof arr[0]); for (idx = 0; idx < trials; idx++) { unsigned long j, size = gt_rand_max(maxsize), maximal_value = 1 + gt_rand_max(1000UL); GtPriorityQueue *pq = gt_priority_queue_new(cmpUlong,size); void *elem; for (j = 0; j< size; j++) { numbers_copy[j] = numbers[j] = gt_rand_max(maximal_value); gt_priority_queue_add(pq, numbers_copy + j); } gt_ensure(had_err,gt_priority_queue_is_full(pq)); qsort(numbers,(size_t) size,sizeof *numbers,cmpUlong); for (j = 0; j < size; j++) { elem = gt_priority_queue_extract_min(pq); if (*((unsigned long *) elem) != numbers[j]) { fprintf(stderr,"elem=%lu != %lu = numbers[%lu]\n", *((unsigned long *) elem),numbers[j],j); exit(EXIT_FAILURE); } gt_ensure(had_err,*((unsigned long *) elem) == numbers[j]); } gt_ensure(had_err,gt_priority_queue_is_empty(pq)); gt_priority_queue_delete(pq); } gt_free(numbers); gt_free(numbers_copy); if (had_err == -1) { exit(EXIT_FAILURE); } return had_err; }
static int gt_lua_mathsupport_rand_max(lua_State *L) { GtUword max = luaL_checknumber(L, 1); lua_pushnumber(L, gt_rand_max(max)); return 1; }
char gt_rand_char(void) { int offset; char c; offset = gt_rand_max(25); c = 97 + offset; gt_assert(c >= 'a' && c <= 'z'); return c; }
static void gt_seqorder_get_shuffled_seqnums(unsigned long nofseqs, unsigned long *seqnums) { unsigned long i, j; gt_assert(seqnums != NULL); seqnums[0] = 0; for (i = 1UL; i < nofseqs; i++) { j = gt_rand_max(i); seqnums[i] = seqnums[j]; seqnums[j] = i; } }
static void gt_seqorder_get_shuffled_seqnums(GtUword nofseqs, GtUword *seqnums) { GtUword i, j; gt_assert(seqnums != NULL); seqnums[0] = 0; for (i = 1UL; i < nofseqs; i++) { j = gt_rand_max(i); seqnums[i] = seqnums[j]; seqnums[j] = i; } }
static void* test_symbol(GT_UNUSED void *data) { GtStr *symbol; GtUword i; symbol = gt_str_new(); for (i = 0; i < NUMBER_OF_SYMBOLS; i++) { gt_str_reset(symbol); gt_str_append_ulong(symbol, gt_rand_max(MAX_SYMBOL)); gt_symbol(gt_str_get(symbol)); gt_assert(!strcmp(gt_symbol(gt_str_get(symbol)), gt_str_get(symbol))); } gt_str_delete(symbol); return NULL; }
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd, unsigned long amount, GtTimer *timer, GtError *err) { char qual[BUFSIZ] = {0}, seq[BUFSIZ] = {0}; int had_err = 0; unsigned long rand, max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1, count; GtStr *timer_comment = gt_str_new_cstr("extracting "); GtStr *desc = gt_str_new(); gt_str_append_ulong(timer_comment, amount); gt_str_append_cstr(timer_comment, " reads of "); gt_str_append_ulong(timer_comment, max_rand + 1); gt_str_append_cstr(timer_comment, "!"); if (timer == NULL) { timer = gt_timer_new_with_progress_description("extract random reads"); gt_timer_start(timer); } else { gt_timer_show_progress(timer, "extract random reads", stdout); } gt_log_log("%s",gt_str_get(timer_comment)); for (count = 0; count < amount; count++) { if (!had_err) { rand = gt_rand_max(max_rand); gt_log_log("get read: %lu", rand); had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err); gt_log_log("%s",gt_str_get(desc)); gt_log_log("%s",seq); gt_log_log("%s",qual); } } gt_str_delete(timer_comment); gt_str_delete(desc); if (!gt_showtime_enabled()) gt_timer_delete(timer); return had_err; }
static char* generate_fragment(GtShredder *shredder, unsigned long *fragment_length, GtStr *desc) { gt_assert(shredder && fragment_length); if (shredder->seqnum < gt_bioseq_number_of_sequences(shredder->bioseq)) { unsigned long seqlen, fraglen; char *frag; seqlen = gt_bioseq_get_sequence_length(shredder->bioseq, shredder->seqnum); fraglen = (shredder->maxlength == shredder->minlength ? 0 : gt_rand_max(shredder->maxlength - shredder->minlength)) + shredder->minlength; gt_assert(fraglen >= shredder->minlength); if (shredder->pos + fraglen > seqlen) fraglen = seqlen - shredder->pos; *fragment_length = fraglen; gt_str_reset(desc); gt_str_append_cstr(desc, gt_bioseq_get_description(shredder->bioseq, shredder->seqnum)); gt_assert(shredder->pos + fraglen <= seqlen); frag = gt_bioseq_get_sequence_range(shredder->bioseq, shredder->seqnum, shredder->pos, shredder->pos + fraglen -1); if (shredder->pos + fraglen == seqlen) { /* last fragment */ shredder->seqnum++; shredder->pos = 0; } else { if (fraglen > shredder->overlap) shredder->pos += fraglen - shredder->overlap; else shredder->pos++; /* go at least one base further each step */ } return frag; } return NULL; }
int gt_intset_16_unit_test(GtError *err) { int had_err = 0; GtIntset *is; GtUword num_of_elems = gt_rand_max(((GtUword) 1) << 10) + 1, *arr = gt_malloc(sizeof (*arr) * num_of_elems), stepsize = GT_DIV2(num_of_elems <<4 / num_of_elems), idx; size_t is_size; gt_error_check(err); arr[0] = gt_rand_max(stepsize) + 1; for (idx = (GtUword) 1; idx < num_of_elems; ++idx) { arr[idx] = arr[idx - 1] + gt_rand_max(stepsize) + 1; } is_size = gt_intset_16_size_of_rep(arr[num_of_elems - 1], num_of_elems); if (!had_err) { if (is_size < (size_t) UINT_MAX) { is = gt_intset_16_new(arr[num_of_elems - 1], num_of_elems); for (idx = 0; idx < num_of_elems; idx++) { gt_intset_16_add(is, arr[idx]); gt_ensure(idx + 1 == gt_intset_16_size(is)); if (idx < num_of_elems - 1) gt_ensure(gt_intset_16_get_idx_smallest_geq(is, arr[idx] + 1) == num_of_elems); } gt_ensure(gt_intset_16_elems_is_valid(is)); gt_ensure(gt_intset_16_secstart_is_valid(is)); for (idx = 0; !had_err && idx < num_of_elems; idx++) { if (arr[idx] != 0 && arr[idx - 1] != (arr[idx] - 1)) { gt_ensure( gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] - 1) == idx); gt_ensure( gt_intset_16_get_idx_smallest_geq(is, arr[idx] - 1) == idx); } gt_ensure(gt_intset_16_get_test(is, idx) == arr[idx]); gt_ensure(gt_intset_16_get(is, idx) == arr[idx]); gt_ensure( gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] + 1) == idx + 1); gt_ensure( gt_intset_16_get_idx_smallest_geq(is, arr[idx] + 1) == idx + 1); } if (!had_err) had_err = gt_intset_unit_test_notinset(is, 0, arr[0] - 1, err); if (!had_err) had_err = gt_intset_unit_test_check_seqnum(is, 0, arr[0] - 1, 0, err); for (idx = (GtUword) 1; !had_err && idx < num_of_elems; idx++) { had_err = gt_intset_unit_test_notinset(is, arr[idx - 1] + 1, arr[idx] - 1, err); if (!had_err) had_err = gt_intset_unit_test_check_seqnum(is, arr[idx - 1] + 1, arr[idx] - 1, idx, err); } gt_intset_delete(is); } } gt_free(arr); return had_err; }
int gt_string_matching_unit_test(GtError *err) { char s[STRING_MATCHING_MAX_STRING_LENGTH+1], p[STRING_MATCHING_MAX_PATTERN_LENGTH+1], *text = "foo"; GtArray *brute_force_matches, *bmh_matches, *kmp_matches, *shift_and_matches; unsigned long i, brute_force_match, bmh_match, kmp_match, shift_and_match; int had_err = 0; gt_error_check(err); brute_force_matches = gt_array_new(sizeof (unsigned long)); bmh_matches = gt_array_new(sizeof (unsigned long)); kmp_matches = gt_array_new(sizeof (unsigned long)); shift_and_matches = gt_array_new(sizeof (unsigned long)); /* match the empty pattern */ gt_string_matching_brute_force(text, strlen(text), "", 0, store_match, brute_force_matches); gt_string_matching_bmh(text, strlen(text), "", 0, store_match, bmh_matches); gt_string_matching_kmp(text, strlen(text), "", 0, store_match, kmp_matches); gt_string_matching_shift_and(text, strlen(text), "", 0, store_match, shift_and_matches); ensure(had_err, !gt_array_size(brute_force_matches)); ensure(had_err, !gt_array_size(bmh_matches)); ensure(had_err, !gt_array_size(kmp_matches)); ensure(had_err, !gt_array_size(shift_and_matches)); for (i = 0; !had_err && i < STRING_MATCHING_NUM_OF_TESTS; i++) { unsigned long j, n, m; /* generate random string and pattern */ n = gt_rand_max(STRING_MATCHING_MAX_STRING_LENGTH); m = gt_rand_max(STRING_MATCHING_MAX_PATTERN_LENGTH); for (j = 0; j < n; j++) s[j] = gt_rand_char(); s[n] = '\0'; for (j = 0; j < m; j++) p[j] = gt_rand_char(); p[m] = '\0'; /* matching (first match) */ brute_force_match = GT_UNDEF_ULONG; bmh_match = GT_UNDEF_ULONG; kmp_match = GT_UNDEF_ULONG; shift_and_match = GT_UNDEF_ULONG; gt_string_matching_brute_force(s, n, p, m, store_first_match, &brute_force_match); gt_string_matching_bmh(s, n, p, m, store_first_match, &bmh_match); gt_string_matching_kmp(s, n, p, m, store_first_match, &kmp_match); gt_string_matching_shift_and(s, n, p, m, store_first_match, &shift_and_match); /* comparing (first match) */ ensure(had_err, brute_force_match == bmh_match); ensure(had_err, brute_force_match == kmp_match); ensure(had_err, brute_force_match == shift_and_match); /* matching (all matches) */ gt_string_matching_brute_force(s, n, p, m, store_match, brute_force_matches); gt_string_matching_bmh(s, n, p, m, store_match, bmh_matches); gt_string_matching_kmp(s, n, p, m, store_match, kmp_matches); gt_string_matching_shift_and(s, n, p, m, store_match, shift_and_matches); /* comparing (all matches) */ ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(bmh_matches)); ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(kmp_matches)); ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(shift_and_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, bmh_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, kmp_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, shift_and_matches)); /* reset */ gt_array_reset(brute_force_matches); gt_array_reset(bmh_matches); gt_array_reset(kmp_matches); gt_array_reset(shift_and_matches); } gt_array_delete(shift_and_matches); gt_array_delete(bmh_matches); gt_array_delete(kmp_matches); gt_array_delete(brute_force_matches); return had_err; }
int gt_dlist_unit_test(GtError *err) { GtDlist *dlist; GtDlistelem *dlistelem; int i, j, size, *data, elem_a = 7, elem_b = 6, elems[MAX_SIZE], elems_backup[MAX_SIZE], had_err = 0; gt_error_check(err); /* boundary case: empty dlist */ dlist = gt_dlist_new(intcompare); ensure(had_err, !gt_dlist_size(dlist)); gt_dlist_delete(dlist); dlist = gt_dlist_new(NULL); ensure(had_err, !gt_dlist_size(dlist)); gt_dlist_delete(dlist); /* boundary case: dlist containing one element */ dlist = gt_dlist_new(intcompare); gt_dlist_add(dlist, &elem_a); ensure(had_err, gt_dlist_size(dlist) == 1); ensure(had_err, elem_a == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist))); gt_dlist_delete(dlist); dlist = gt_dlist_new(NULL); gt_dlist_add(dlist, &elem_a); ensure(had_err, gt_dlist_size(dlist) == 1); ensure(had_err, elem_a == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist))); gt_dlist_delete(dlist); /* boundary case: dlist containing two elements */ dlist = gt_dlist_new(intcompare); gt_dlist_add(dlist, &elem_a); gt_dlist_add(dlist, &elem_b); ensure(had_err, gt_dlist_size(dlist) == 2); ensure(had_err, elem_b == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist))); gt_dlist_delete(dlist); dlist = gt_dlist_new(NULL); gt_dlist_add(dlist, &elem_a); gt_dlist_add(dlist, &elem_b); ensure(had_err, gt_dlist_size(dlist) == 2); ensure(had_err, elem_a == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist))); gt_dlist_delete(dlist); for (i = 0; i < NUM_OF_TESTS && !had_err; i++) { /* construct the random elements for the list */ size = gt_rand_max(MAX_SIZE); for (j = 0; j < size; j++) { elems[j] = gt_rand_max(INT_MAX); elems_backup[j] = elems[j]; } /* sort the backup elements */ qsort(elems_backup, size, sizeof (int), intcompare); /* test with compare function */ dlist = gt_dlist_new(intcompare); ensure(had_err, !gt_dlist_size(dlist)); for (j = 0; j < size && !had_err; j++) { gt_dlist_add(dlist, elems + j); ensure(had_err, gt_dlist_size(dlist) == j+1); for (dlistelem = gt_dlist_first(dlist); dlistelem != NULL; dlistelem = gt_dlistelem_next(dlistelem)) { } } j = 0; for (dlistelem = gt_dlist_first(dlist); dlistelem != NULL; dlistelem = gt_dlistelem_next(dlistelem)) { data = gt_dlistelem_get_data(dlistelem); ensure(had_err, *data == elems_backup[j]); j++; } /* test gt_dlist_find() */ for (j = 0; j < size; j++) { dlistelem = gt_dlist_find(dlist, elems_backup + j); ensure(had_err, dlistelem); ensure(had_err, *(int*) gt_dlistelem_get_data(dlistelem) == elems_backup[j]); } /* remove first element */ if (gt_dlist_size(dlist)) { gt_dlist_remove(dlist, gt_dlist_first(dlist)); if (gt_dlist_size(dlist)) { data = gt_dlistelem_get_data(gt_dlist_first(dlist)); ensure(had_err, *data == elems_backup[1]); } } /* remove last element */ if (gt_dlist_size(dlist)) { gt_dlist_remove(dlist, gt_dlist_last(dlist)); if (gt_dlist_size(dlist)) { data = gt_dlistelem_get_data(gt_dlist_last(dlist)); ensure(had_err, *data == elems_backup[size - 2]); } } /* XXX: fix this */ #if 0 /* remove middle element */ if (gt_dlist_size(dlist) >= 2) { dlistelem = gt_dlist_first(dlist); for (j = 1; j < gt_dlist_size(dlist) / 2; j++) dlistelem = gt_dlistelem_next(dlistelem); gt_dlist_remove(dlist, dlistelem); dlistelem = gt_dlist_first(dlist); for (j = 1; j < gt_dlist_size(dlist) / 2 + 1; j++) dlistelem = gt_dlistelem_next(dlistelem); data = gt_dlistelem_get_data(gt_dlist_last(dlist)); ensure(had_err, *data == elems_backup[size / 2 + 1]); } #endif gt_dlist_delete(dlist); /* test without compare function */ dlist = gt_dlist_new(NULL); ensure(had_err, !gt_dlist_size(dlist)); for (j = 0; j < size && !had_err; j++) { gt_dlist_add(dlist, elems + j); ensure(had_err, gt_dlist_size(dlist) == j+1); } j = 0; for (dlistelem = gt_dlist_first(dlist); dlistelem != NULL; dlistelem = gt_dlistelem_next(dlistelem)) { data = gt_dlistelem_get_data(dlistelem); ensure(had_err, *data == elems[j]); j++; } /* remove first element */ if (gt_dlist_size(dlist)) { gt_dlist_remove(dlist, gt_dlist_first(dlist)); if (gt_dlist_size(dlist)) { data = gt_dlistelem_get_data(gt_dlist_first(dlist)); ensure(had_err, *data == elems[1]); } } /* remove last element */ if (gt_dlist_size(dlist)) { gt_dlist_remove(dlist, gt_dlist_last(dlist)); if (gt_dlist_size(dlist)) { data = gt_dlistelem_get_data(gt_dlist_last(dlist)); ensure(had_err, *data == elems[size - 2]); } } gt_dlist_delete(dlist); } return had_err; }
int gt_interval_tree_unit_test(GT_UNUSED GtError *err) { GtIntervalTree *it = NULL; GtIntervalTreeNode *res = NULL; unsigned long i = 0; int had_err = 0, num_testranges = 3000, num_samples = 300000, num_find_all_samples = 10000, gt_range_max_basepos = 90000, width = 700, query_width = 5000; GtRange *res_rng = NULL, qrange; GtArray *arr = NULL, *narr = NULL; arr = gt_array_new(sizeof (GtRange*)); /* generate test ranges */ for (i = 0;i<num_testranges;i++) { unsigned long start; GtRange *rng; rng = gt_calloc(1, sizeof (GtRange)); start = gt_rand_max(gt_range_max_basepos); rng->start = start; rng->end = start + gt_rand_max(width); gt_array_add(arr, rng); } it = gt_interval_tree_new(gt_free_func); /* insert ranges */ for (i = 0; i < num_testranges && !had_err; i++) { GtIntervalTreeNode *new_node; GtRange *rng; rng = *(GtRange**) gt_array_get(arr, i); new_node = gt_interval_tree_node_new(rng, rng->start, rng->end); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); /* perform test queries */ for (i = 0; i < num_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(width); res = gt_interval_tree_find_first_overlapping(it, qrange.start, qrange.end); if (res) { /* we have a hit, check if really overlapping */ res_rng = (GtRange*) gt_interval_tree_node_get_data(res); gt_ensure(had_err, gt_range_overlap(&qrange, res_rng)); } else { /* no hit, check whether there really is no overlapping interval in tree */ GtRange *this_rng; unsigned long j; bool found = false; for (j = 0; j < gt_array_size(arr); j++) { this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { found = true; break; } } gt_ensure(had_err, !found); } } /* test searching for all overlapping intervals */ for (i = 0; i < num_find_all_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(query_width); GtArray *res = gt_array_new(sizeof (GtRange*)); gt_interval_tree_find_all_overlapping(it, qrange.start, qrange.end, res); if (res) { /* generate reference overlapping interval list by linear search */ GtArray *ref; unsigned long j; ref = gt_array_new(sizeof (GtRange*)); for (j = 0; j < gt_array_size(arr); j++) { GtRange *this_rng; this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { gt_array_add(ref, this_rng); } } /* compare reference with interval tree query result */ gt_array_sort_stable(ref, range_ptr_compare); gt_array_sort_stable(res, range_ptr_compare); /* must be equal */ gt_ensure(had_err, gt_array_cmp(ref, res)==0); gt_array_delete(ref); } gt_array_delete(res); } gt_interval_tree_delete(it); it = gt_interval_tree_new(NULL); gt_array_reset(arr); /* generate test ranges */ for (i = 0;i<num_testranges && !had_err;i++) { unsigned long start; GtIntervalTreeNode *new_node; start = gt_rand_max(gt_range_max_basepos); new_node = gt_interval_tree_node_new((void*) i, start, start + gt_rand_max(width)); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); narr = gt_array_new(sizeof (GtIntervalTreeNode*)); for (i = 0; i < num_testranges && !had_err; i++) { unsigned long idx, n, val; GtIntervalTreeNode *node = NULL; /* get all nodes referenced by the interval tree */ interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); /* remove a random node */ idx = gt_rand_max(gt_array_size(narr)-1); node = *(GtIntervalTreeNode**) gt_array_get(narr, idx); gt_ensure(had_err, node != NULL); val = (unsigned long) gt_interval_tree_node_get_data(node); gt_interval_tree_remove(it, node); gt_array_reset(narr); /* make sure that the node has disappeared */ gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges - (i+1)); interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); gt_ensure(had_err, gt_array_size(narr) == num_testranges - (i+1)); for (n = 0; !had_err && n < gt_array_size(narr); n++) { GtIntervalTreeNode *onode = *(GtIntervalTreeNode**) gt_array_get(narr, n); gt_ensure(had_err, (unsigned long) gt_interval_tree_node_get_data(onode) != val); } } gt_array_delete(arr); gt_array_delete(narr); gt_interval_tree_delete(it); return had_err; }
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtKmerDatabaseArguments *arguments = tool_arguments; int had_err = 0; GtEncseq *es; GtUword es_length, nu_kmer_codes = 0; GtKmerDatabase *compare_db = NULL, *db = NULL; GtLogger *logger; FILE *fp = NULL; GtHashmap *kmer_hash = NULL; GtTimer *timer = NULL; gt_error_check(err); gt_assert(arguments); if (arguments->use_hash) kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL, (GtFree) gt_kmer_database_delete_hash_value); if (arguments->bench) timer = gt_timer_new_with_progress_description("loading encoded sequence"); logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) { fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err); gt_logger_set_target(logger, fp); } if (!had_err) { GtEncseqLoader *es_l; if (arguments->bench) gt_timer_start(timer); es_l = gt_encseq_loader_new(); es = gt_encseq_loader_load(es_l, argv[parsed_args], err); if (arguments->bench) gt_timer_show_progress(timer, "saving kmers (+iterating over file)", stdout); if (es == NULL) { had_err = -1; } gt_encseq_loader_delete(es_l); } if (!had_err) { es_length = gt_encseq_total_length(es); if (es_length < (GtUword) arguments->kmersize) { gt_error_set(err, "Input is too short for used kmersize. File length: " GT_WU " kmersize: %u", es_length, arguments->kmersize); had_err = -1; } } if (!had_err) { GtAlphabet *alphabet; alphabet = gt_encseq_alphabet(es); if (arguments->bench) nu_kmer_codes = gt_power_for_small_exponents( gt_alphabet_num_of_chars(alphabet), arguments->kmersize); if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) { compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet), arguments->kmersize, arguments->sb_size, es); } if (!arguments->use_hash) { db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet), arguments->kmersize, arguments->sb_size, es); if (arguments->cutoff) { if (arguments->mean_cutoff) gt_kmer_database_use_mean_cutoff(db, (GtUword) 2, arguments->cutoff_value); else gt_kmer_database_set_cutoff(db, arguments->cutoff_value); if (!arguments->prune) gt_kmer_database_set_prune(db); } } } if (!had_err) { GtUword startpos = 0, endpos; GtKmercodeiterator *iter; const GtKmercode *kmercode = NULL; iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD, arguments->kmersize, 0); while (!had_err && startpos < es_length - (arguments->kmersize - 1)) { GtUword startpos_add_kmer = startpos; if (arguments->merge_only) { endpos = startpos + (arguments->kmersize - 1) + (gt_rand_max((arguments->sb_size - 1) * 2)); if (endpos > es_length) endpos = es_length; } else { endpos = startpos + (arguments->kmersize - 1) + (gt_rand_max(arguments->sb_size - 1)); } gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos); while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL && startpos_add_kmer <= endpos - (arguments->kmersize - 1)) { if (!arguments->merge_only && !arguments->use_hash && !kmercode->definedspecialposition && !arguments->bench) { gt_kmer_database_add_kmer(compare_db, kmercode->code, startpos_add_kmer); } if (arguments->use_hash && !kmercode->definedspecialposition) { gt_kmer_database_add_to_hash(kmer_hash, kmercode->code, startpos_add_kmer); } startpos_add_kmer++; } if (!arguments->use_hash) { gt_kmer_database_add_interval(db, startpos, endpos); gt_kmer_database_print_buffer(db, logger); if (!arguments->bench) had_err = gt_kmer_database_check_consistency(db, err); } startpos = endpos + 1; } if (!arguments->use_hash) { gt_kmer_database_flush(db); gt_kmer_database_print_buffer(db, logger); if (!had_err && !arguments->bench) had_err = gt_kmer_database_check_consistency(db, err); if (!arguments->merge_only && !had_err && !arguments->bench) had_err = gt_kmer_database_check_consistency(compare_db, err); if (!arguments->merge_only && !arguments->bench) gt_kmer_database_print(compare_db, logger, true); if (!arguments->merge_only && !had_err && !arguments->bench) had_err = gt_kmer_database_compare(compare_db, db, err); gt_kmer_database_print(db, logger, true); } gt_kmercodeiterator_delete(iter); } if (arguments->bench) { GtKmerStartpos pos; GtArrayGtUword *pos_hash; GtUword rand_access = (GtUword) 50000000, rand_code, i, sum = 0; gt_timer_show_progress(timer, "random access", stdout); for (i = 0; i < rand_access; i++) { rand_code = gt_rand_max(nu_kmer_codes - 1); if (arguments->use_hash) { pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code); if (pos_hash != NULL) sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1]; } else { pos = gt_kmer_database_get_startpos(db, rand_code); if (pos.no_positions > 0) sum += pos.startpos[pos.no_positions - 1]; } } printf("sum: " GT_WU "\n", sum); gt_timer_show_progress(timer, "", stdout); gt_timer_stop(timer); gt_timer_delete(timer); } if (arguments->use_hash) gt_hashmap_delete(kmer_hash); gt_encseq_delete(es); if (!arguments->use_hash) gt_kmer_database_delete(db); if (!arguments->merge_only && !arguments->bench) gt_kmer_database_delete(compare_db); gt_logger_delete(logger); gt_fa_fclose(fp); return had_err; }
static int gt_compressedbits_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCompressdbitsArguments *arguments = tool_arguments; int had_err = 0; unsigned long idx; unsigned long long num_of_bits = 0ULL; GtBitsequence *bits = NULL; GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL; GtStr *filename = gt_str_new(); FILE *fp = NULL; gt_error_check(err); gt_assert(arguments); gt_assert(argc == parsed_args); if (gt_option_is_set(arguments->filename_op)) { FILE *file = NULL; gt_assert(arguments->filename != NULL); file = gt_xfopen(gt_str_get(arguments->filename), "r"); if ((size_t) 1 != gt_xfread(&num_of_bits, sizeof (num_of_bits), (size_t) 1, file)) { had_err = -1; } if (!had_err) { gt_log_log("bits to read: %llu", num_of_bits); arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits); bits = gt_malloc(sizeof (*bits) * arguments->size); if ((size_t) arguments->size != gt_xfread(bits, sizeof (*bits), (size_t) arguments->size, file)) { had_err = -1; } } gt_xfclose(file); } else { bits = gt_calloc(sizeof (*bits), (size_t) arguments->size); num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size); if (arguments->fill_random) { for (idx = 0; idx < arguments->size; idx++) { bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX)); } } else { for (idx = 0; idx < arguments->size; idx++) bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx); } } if (!had_err) { fp = gt_xtmpfp(filename); gt_fa_xfclose(fp); fp = NULL; gt_log_log("filename: %s", gt_str_get(filename)); gt_log_log("size in words: %lu", arguments->size); cbs = gt_compressed_bitsequence_new( bits, arguments->samplerate, (unsigned long) num_of_bits); gt_log_log("original size in MB: %2.3f", (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0)); gt_log_log("compressed size in MB: %2.3f", gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0)); gt_log_log("popcount table size thereof in MB: %2.3f", gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0)); had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err); } if (!had_err) { read_cbs = gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err); if (read_cbs == NULL) had_err = -1; } if (!had_err && bits != NULL && arguments->check_consistency) { for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) { int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx); int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0; gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit); gt_assert(original == bit); } } gt_compressed_bitsequence_delete(cbs); gt_compressed_bitsequence_delete(read_cbs); gt_free(bits); gt_str_delete(filename); return had_err; }