Example #1
0
static GtWtreeSymbol gt_wtree_encseq_access_rec(GtWtreeEncseq *we,
                                                GtUword pos,
                                                GtUword node_start,
                                                GtUword node_size,
                                                unsigned int alpha_start,
                                                unsigned int alpha_end)
{
  unsigned int middle = GT_DIV2(alpha_start + alpha_end);
  int bit;
  GtUword zero_rank_prefix = 0,
          one_rank_prefix = 0,
          left_child_size;
  gt_assert(pos < node_size);

  if (alpha_start < alpha_end) {
    bit = gt_compressed_bitsequence_access(we->c_bits, node_start + pos);
    if (node_start != 0)
      zero_rank_prefix =
        gt_compressed_bitsequence_rank_0(we->c_bits, node_start - 1);
    left_child_size =
      gt_compressed_bitsequence_rank_0(we->c_bits, node_start + node_size - 1) -
      zero_rank_prefix;

    if (bit == 0) {
      pos = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + pos) -
        zero_rank_prefix - 1; /*convert count (rank) to positon */
      alpha_end = middle;
      node_start += we->parent_instance.members->length;
      node_size = left_child_size;
      return gt_wtree_encseq_access_rec(we, pos, node_start,
                                        node_size, alpha_start, alpha_end);
    }
    else {
      if (node_start != 0)
        one_rank_prefix =
          gt_compressed_bitsequence_rank_1(we->c_bits, node_start - 1);
      pos = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + pos) -
        one_rank_prefix - 1; /*convert count (rank) to positon */
      alpha_start = middle + 1;
      node_size =
        gt_compressed_bitsequence_rank_1(we->c_bits,
                                         node_start + node_size - 1) -
        one_rank_prefix;
      node_start +=
        we->parent_instance.members->length + left_child_size;
      return gt_wtree_encseq_access_rec(we, pos, node_start,
                                        node_size, alpha_start, alpha_end);
    }
  }
  return (GtWtreeSymbol) alpha_start;
}
Example #2
0
static int gt_compressedbits_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  int had_err = 0;
  unsigned long idx;
  unsigned long long num_of_bits = 0ULL;
  GtBitsequence *bits = NULL;
  GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL;
  GtStr *filename = gt_str_new();
  FILE *fp = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  gt_assert(argc == parsed_args);

  if (gt_option_is_set(arguments->filename_op)) {
    FILE *file = NULL;
    gt_assert(arguments->filename != NULL);

    file = gt_xfopen(gt_str_get(arguments->filename), "r");
    if ((size_t) 1 != gt_xfread(&num_of_bits,
                                sizeof (num_of_bits), (size_t) 1, file)) {
      had_err = -1;
    }
    if (!had_err) {
      gt_log_log("bits to read: %llu", num_of_bits);
      arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits);
      bits = gt_malloc(sizeof (*bits) * arguments->size);
      if ((size_t) arguments->size !=
          gt_xfread(bits, sizeof (*bits),
                    (size_t) arguments->size, file)) {
        had_err = -1;
      }
    }
    gt_xfclose(file);
  }
  else {
    bits = gt_calloc(sizeof (*bits), (size_t) arguments->size);
    num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size);

    if (arguments->fill_random) {
      for (idx = 0; idx < arguments->size; idx++) {
        bits[idx] =
          (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX));
      }
    }
    else {
      for (idx = 0; idx < arguments->size; idx++)
        bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx);
    }
  }

  if (!had_err) {
    fp = gt_xtmpfp(filename);
    gt_fa_xfclose(fp);
    fp = NULL;

    gt_log_log("filename: %s", gt_str_get(filename));
    gt_log_log("size in words: %lu", arguments->size);
    cbs = gt_compressed_bitsequence_new(
                            bits, arguments->samplerate,
                            (unsigned long) num_of_bits);
    gt_log_log("original size in MB: %2.3f",
               (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0));
    gt_log_log("compressed size in MB: %2.3f",
               gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0));
    gt_log_log("popcount table size thereof in MB: %2.3f",
               gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0));
    had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err);
  }
  if (!had_err)
  {
    read_cbs =
      gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err);
    if (read_cbs == NULL)
      had_err = -1;
  }
  if (!had_err && bits != NULL && arguments->check_consistency) {
    for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) {
      int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx);
      int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0;
      gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit);
      gt_assert(original == bit);
    }
  }
  gt_compressed_bitsequence_delete(cbs);
  gt_compressed_bitsequence_delete(read_cbs);
  gt_free(bits);
  gt_str_delete(filename);
  return had_err;
}