GtWtree* gt_wtree_encseq_new(GtEncseq *encseq) { /* sample rate for compressd bitseq */ const unsigned int samplerate = 32U; GtWtree *wtree; GtWtreeEncseq *wtree_encseq; wtree = gt_wtree_create(gt_wtree_encseq_class()); wtree_encseq = gt_wtree_encseq_cast(wtree); wtree_encseq->encseq = gt_encseq_ref(encseq); wtree_encseq->alpha = gt_alphabet_ref(gt_encseq_alphabet(encseq)); /* encoded chars + WC given by gt_alphabet_size, we have to encode UNDEFCHAR and SEPARATOR too */ wtree_encseq->alpha_size = gt_alphabet_size(wtree_encseq->alpha) + 2; wtree->members->num_of_symbols = (GtUword) wtree_encseq->alpha_size; /* levels in tree: \lceil log_2(\sigma)\rceil */ wtree_encseq->levels = gt_determinebitspervalue((GtUword) wtree_encseq->alpha_size); wtree_encseq->root_fo = gt_wtree_encseq_fill_offset_new(); wtree_encseq->current_fo = wtree_encseq->root_fo; wtree->members->length = gt_encseq_total_length(encseq); /* each level has number of symbols bits */ wtree_encseq->num_of_bits = wtree_encseq->levels * wtree->members->length; wtree_encseq->bits_size = wtree_encseq->num_of_bits / (sizeof (GtBitsequence) * CHAR_BIT); if (wtree_encseq->num_of_bits % (sizeof (GtBitsequence) * CHAR_BIT) != 0) wtree_encseq->bits_size++; wtree_encseq->bits = gt_calloc((size_t) wtree_encseq->bits_size, sizeof (GtBitsequence)); wtree_encseq->node_start = 0; gt_wtree_encseq_fill_bits(wtree_encseq); wtree_encseq->c_bits = gt_compressed_bitsequence_new(wtree_encseq->bits, samplerate, wtree_encseq->num_of_bits); gt_free(wtree_encseq->bits); wtree_encseq->bits = NULL; return wtree; }
static int gt_compressedbits_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCompressdbitsArguments *arguments = tool_arguments; int had_err = 0; unsigned long idx; unsigned long long num_of_bits = 0ULL; GtBitsequence *bits = NULL; GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL; GtStr *filename = gt_str_new(); FILE *fp = NULL; gt_error_check(err); gt_assert(arguments); gt_assert(argc == parsed_args); if (gt_option_is_set(arguments->filename_op)) { FILE *file = NULL; gt_assert(arguments->filename != NULL); file = gt_xfopen(gt_str_get(arguments->filename), "r"); if ((size_t) 1 != gt_xfread(&num_of_bits, sizeof (num_of_bits), (size_t) 1, file)) { had_err = -1; } if (!had_err) { gt_log_log("bits to read: %llu", num_of_bits); arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits); bits = gt_malloc(sizeof (*bits) * arguments->size); if ((size_t) arguments->size != gt_xfread(bits, sizeof (*bits), (size_t) arguments->size, file)) { had_err = -1; } } gt_xfclose(file); } else { bits = gt_calloc(sizeof (*bits), (size_t) arguments->size); num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size); if (arguments->fill_random) { for (idx = 0; idx < arguments->size; idx++) { bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX)); } } else { for (idx = 0; idx < arguments->size; idx++) bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx); } } if (!had_err) { fp = gt_xtmpfp(filename); gt_fa_xfclose(fp); fp = NULL; gt_log_log("filename: %s", gt_str_get(filename)); gt_log_log("size in words: %lu", arguments->size); cbs = gt_compressed_bitsequence_new( bits, arguments->samplerate, (unsigned long) num_of_bits); gt_log_log("original size in MB: %2.3f", (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0)); gt_log_log("compressed size in MB: %2.3f", gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0)); gt_log_log("popcount table size thereof in MB: %2.3f", gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0)); had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err); } if (!had_err) { read_cbs = gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err); if (read_cbs == NULL) had_err = -1; } if (!had_err && bits != NULL && arguments->check_consistency) { for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) { int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx); int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0; gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit); gt_assert(original == bit); } } gt_compressed_bitsequence_delete(cbs); gt_compressed_bitsequence_delete(read_cbs); gt_free(bits); gt_str_delete(filename); return had_err; }