Exemplo n.º 1
0
GtWtree* gt_wtree_encseq_new(GtEncseq *encseq)
{
  /* sample rate for compressd bitseq */
  const unsigned int samplerate = 32U;
  GtWtree *wtree;
  GtWtreeEncseq *wtree_encseq;
  wtree = gt_wtree_create(gt_wtree_encseq_class());
  wtree_encseq = gt_wtree_encseq_cast(wtree);
  wtree_encseq->encseq = gt_encseq_ref(encseq);
  wtree_encseq->alpha = gt_alphabet_ref(gt_encseq_alphabet(encseq));
  /* encoded chars + WC given by gt_alphabet_size,
     we have to encode UNDEFCHAR and SEPARATOR too */
  wtree_encseq->alpha_size = gt_alphabet_size(wtree_encseq->alpha) + 2;
  wtree->members->num_of_symbols = (GtUword) wtree_encseq->alpha_size;
  /* levels in tree: \lceil log_2(\sigma)\rceil */
  wtree_encseq->levels =
    gt_determinebitspervalue((GtUword) wtree_encseq->alpha_size);
  wtree_encseq->root_fo = gt_wtree_encseq_fill_offset_new();
  wtree_encseq->current_fo = wtree_encseq->root_fo;
  wtree->members->length =
    gt_encseq_total_length(encseq);
  /* each level has number of symbols bits */
  wtree_encseq->num_of_bits =
    wtree_encseq->levels *
    wtree->members->length;
  wtree_encseq->bits_size =
    wtree_encseq->num_of_bits / (sizeof (GtBitsequence) * CHAR_BIT);
  if (wtree_encseq->num_of_bits % (sizeof (GtBitsequence) * CHAR_BIT) != 0)
    wtree_encseq->bits_size++;
  wtree_encseq->bits =
    gt_calloc((size_t) wtree_encseq->bits_size, sizeof (GtBitsequence));
  wtree_encseq->node_start = 0;
  gt_wtree_encseq_fill_bits(wtree_encseq);
  wtree_encseq->c_bits =
    gt_compressed_bitsequence_new(wtree_encseq->bits,
                                  samplerate,
                                  wtree_encseq->num_of_bits);
  gt_free(wtree_encseq->bits);
  wtree_encseq->bits = NULL;
  return wtree;
}
Exemplo n.º 2
0
static int gt_compressedbits_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  int had_err = 0;
  unsigned long idx;
  unsigned long long num_of_bits = 0ULL;
  GtBitsequence *bits = NULL;
  GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL;
  GtStr *filename = gt_str_new();
  FILE *fp = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  gt_assert(argc == parsed_args);

  if (gt_option_is_set(arguments->filename_op)) {
    FILE *file = NULL;
    gt_assert(arguments->filename != NULL);

    file = gt_xfopen(gt_str_get(arguments->filename), "r");
    if ((size_t) 1 != gt_xfread(&num_of_bits,
                                sizeof (num_of_bits), (size_t) 1, file)) {
      had_err = -1;
    }
    if (!had_err) {
      gt_log_log("bits to read: %llu", num_of_bits);
      arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits);
      bits = gt_malloc(sizeof (*bits) * arguments->size);
      if ((size_t) arguments->size !=
          gt_xfread(bits, sizeof (*bits),
                    (size_t) arguments->size, file)) {
        had_err = -1;
      }
    }
    gt_xfclose(file);
  }
  else {
    bits = gt_calloc(sizeof (*bits), (size_t) arguments->size);
    num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size);

    if (arguments->fill_random) {
      for (idx = 0; idx < arguments->size; idx++) {
        bits[idx] =
          (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX));
      }
    }
    else {
      for (idx = 0; idx < arguments->size; idx++)
        bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx);
    }
  }

  if (!had_err) {
    fp = gt_xtmpfp(filename);
    gt_fa_xfclose(fp);
    fp = NULL;

    gt_log_log("filename: %s", gt_str_get(filename));
    gt_log_log("size in words: %lu", arguments->size);
    cbs = gt_compressed_bitsequence_new(
                            bits, arguments->samplerate,
                            (unsigned long) num_of_bits);
    gt_log_log("original size in MB: %2.3f",
               (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0));
    gt_log_log("compressed size in MB: %2.3f",
               gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0));
    gt_log_log("popcount table size thereof in MB: %2.3f",
               gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0));
    had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err);
  }
  if (!had_err)
  {
    read_cbs =
      gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err);
    if (read_cbs == NULL)
      had_err = -1;
  }
  if (!had_err && bits != NULL && arguments->check_consistency) {
    for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) {
      int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx);
      int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0;
      gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit);
      gt_assert(original == bit);
    }
  }
  gt_compressed_bitsequence_delete(cbs);
  gt_compressed_bitsequence_delete(read_cbs);
  gt_free(bits);
  gt_str_delete(filename);
  return had_err;
}