Пример #1
0
int gt_priority_queue_unit_test(GtError *err)
{
  int had_err = 0;
  unsigned long idx,
                maxsize = 10000UL,
                trials = 1000UL,
                *numbers = gt_malloc(sizeof *numbers * maxsize),
                *numbers_copy = gt_malloc(sizeof *numbers_copy * maxsize);

  unsigned long arr[] = {76UL, 132UL, 136UL, 538UL, 545UL, 401UL};

  gt_error_check (err);
  gt_priority_sort(arr,(unsigned long) sizeof arr/sizeof arr[0]);
  for (idx = 0; idx < trials; idx++)
  {
    unsigned long j,
                  size = gt_rand_max(maxsize),
                  maximal_value = 1 + gt_rand_max(1000UL);
    GtPriorityQueue *pq = gt_priority_queue_new(cmpUlong,size);
    void *elem;

    for (j = 0; j< size; j++)
    {
      numbers_copy[j] = numbers[j] = gt_rand_max(maximal_value);
      gt_priority_queue_add(pq, numbers_copy + j);
    }
    gt_ensure(had_err,gt_priority_queue_is_full(pq));
    qsort(numbers,(size_t) size,sizeof *numbers,cmpUlong);
    for (j = 0; j < size; j++)
    {
      elem = gt_priority_queue_extract_min(pq);
      if (*((unsigned long *) elem) != numbers[j])
      {
        fprintf(stderr,"elem=%lu != %lu = numbers[%lu]\n",
                      *((unsigned long *) elem),numbers[j],j);
        exit(EXIT_FAILURE);
      }
      gt_ensure(had_err,*((unsigned long *) elem) == numbers[j]);
    }
    gt_ensure(had_err,gt_priority_queue_is_empty(pq));
    gt_priority_queue_delete(pq);
  }
  gt_free(numbers);
  gt_free(numbers_copy);
  if (had_err == -1)
  {
    exit(EXIT_FAILURE);
  }
  return had_err;
}
Пример #2
0
static int gt_lua_mathsupport_rand_max(lua_State *L)
{
  GtUword max = luaL_checknumber(L, 1);

  lua_pushnumber(L, gt_rand_max(max));
  return 1;
}
Пример #3
0
char gt_rand_char(void)
{
  int offset;
  char c;
  offset = gt_rand_max(25);
  c = 97 + offset;
  gt_assert(c >= 'a' && c <= 'z');
  return c;
}
Пример #4
0
static void gt_seqorder_get_shuffled_seqnums(unsigned long nofseqs,
    unsigned long *seqnums)
{
  unsigned long i, j;

  gt_assert(seqnums != NULL);
  seqnums[0] = 0;
  for (i = 1UL; i < nofseqs; i++)
  {
    j = gt_rand_max(i);
    seqnums[i] = seqnums[j];
    seqnums[j] = i;
  }
}
Пример #5
0
static void gt_seqorder_get_shuffled_seqnums(GtUword nofseqs,
    GtUword *seqnums)
{
  GtUword i, j;

  gt_assert(seqnums != NULL);
  seqnums[0] = 0;
  for (i = 1UL; i < nofseqs; i++)
  {
    j = gt_rand_max(i);
    seqnums[i] = seqnums[j];
    seqnums[j] = i;
  }
}
Пример #6
0
static void* test_symbol(GT_UNUSED void *data)
{
  GtStr *symbol;
  GtUword i;
  symbol = gt_str_new();
  for (i = 0; i < NUMBER_OF_SYMBOLS; i++) {
    gt_str_reset(symbol);
    gt_str_append_ulong(symbol, gt_rand_max(MAX_SYMBOL));
    gt_symbol(gt_str_get(symbol));
    gt_assert(!strcmp(gt_symbol(gt_str_get(symbol)), gt_str_get(symbol)));
  }
  gt_str_delete(symbol);
  return NULL;
}
static int gt_compreads_decompress_benchmark(GtHcrDecoder *hcrd,
                                             unsigned long amount,
                                             GtTimer *timer,
                                             GtError *err) {
  char qual[BUFSIZ] = {0},
       seq[BUFSIZ] = {0};
  int had_err = 0;
  unsigned long rand,
                max_rand = gt_hcr_decoder_num_of_reads(hcrd) - 1,
                count;

  GtStr *timer_comment = gt_str_new_cstr("extracting ");
  GtStr *desc = gt_str_new();

  gt_str_append_ulong(timer_comment, amount);
  gt_str_append_cstr(timer_comment, " reads of ");
  gt_str_append_ulong(timer_comment, max_rand + 1);
  gt_str_append_cstr(timer_comment, "!");

  if (timer == NULL) {
    timer = gt_timer_new_with_progress_description("extract random reads");
    gt_timer_start(timer);
  }
  else {
    gt_timer_show_progress(timer, "extract random reads", stdout);
  }

  gt_log_log("%s",gt_str_get(timer_comment));
  for (count = 0; count < amount; count++) {
    if (!had_err) {
      rand = gt_rand_max(max_rand);
      gt_log_log("get read: %lu", rand);
      had_err = gt_hcr_decoder_decode(hcrd, rand, seq, qual, desc, err);
      gt_log_log("%s",gt_str_get(desc));
      gt_log_log("%s",seq);
      gt_log_log("%s",qual);
    }
  }
  gt_str_delete(timer_comment);
  gt_str_delete(desc);
  if (!gt_showtime_enabled())
    gt_timer_delete(timer);
  return had_err;
}
Пример #8
0
static char* generate_fragment(GtShredder *shredder,
                               unsigned long *fragment_length,
                               GtStr *desc)
{
  gt_assert(shredder && fragment_length);
  if (shredder->seqnum < gt_bioseq_number_of_sequences(shredder->bioseq)) {
    unsigned long seqlen, fraglen;
    char *frag;
    seqlen = gt_bioseq_get_sequence_length(shredder->bioseq, shredder->seqnum);
    fraglen = (shredder->maxlength == shredder->minlength
               ? 0 : gt_rand_max(shredder->maxlength - shredder->minlength))
              + shredder->minlength;
    gt_assert(fraglen >= shredder->minlength);
    if (shredder->pos + fraglen > seqlen)
      fraglen = seqlen - shredder->pos;
    *fragment_length = fraglen;
    gt_str_reset(desc);
    gt_str_append_cstr(desc, gt_bioseq_get_description(shredder->bioseq,
                                                       shredder->seqnum));
    gt_assert(shredder->pos + fraglen <= seqlen);
    frag = gt_bioseq_get_sequence_range(shredder->bioseq, shredder->seqnum,
                                        shredder->pos,
                                        shredder->pos + fraglen -1);
    if (shredder->pos + fraglen == seqlen) { /* last fragment */
      shredder->seqnum++;
      shredder->pos = 0;
    }
    else {
      if (fraglen > shredder->overlap)
        shredder->pos += fraglen - shredder->overlap;
      else
        shredder->pos++; /* go at least one base further each step */
    }
    return frag;
  }
  return NULL;
}
Пример #9
0
int gt_intset_16_unit_test(GtError *err)
{
  int had_err = 0;
  GtIntset *is;
  GtUword num_of_elems = gt_rand_max(((GtUword) 1) << 10) + 1,
          *arr = gt_malloc(sizeof (*arr) * num_of_elems),
          stepsize = GT_DIV2(num_of_elems <<4 / num_of_elems),
          idx;
  size_t is_size;

  gt_error_check(err);

  arr[0] = gt_rand_max(stepsize) + 1;
  for (idx = (GtUword) 1; idx < num_of_elems; ++idx) {
    arr[idx] = arr[idx - 1] + gt_rand_max(stepsize) + 1;
  }

  is_size =     gt_intset_16_size_of_rep(arr[num_of_elems - 1], num_of_elems);

  if (!had_err) {
    if (is_size < (size_t) UINT_MAX) {
      is = gt_intset_16_new(arr[num_of_elems - 1], num_of_elems);
      for (idx = 0; idx < num_of_elems; idx++) {
        gt_intset_16_add(is, arr[idx]);
        gt_ensure(idx + 1 == gt_intset_16_size(is));
        if (idx < num_of_elems - 1)
          gt_ensure(gt_intset_16_get_idx_smallest_geq(is,
                                                      arr[idx] + 1) ==
                    num_of_elems);
      }

      gt_ensure(gt_intset_16_elems_is_valid(is));
      gt_ensure(gt_intset_16_secstart_is_valid(is));

      for (idx = 0; !had_err && idx < num_of_elems; idx++) {
        if (arr[idx] != 0 && arr[idx - 1] != (arr[idx] - 1)) {
          gt_ensure(
            gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] - 1) ==
            idx);
          gt_ensure(
            gt_intset_16_get_idx_smallest_geq(is, arr[idx] - 1) ==
            idx);
        }
        gt_ensure(gt_intset_16_get_test(is, idx) == arr[idx]);
        gt_ensure(gt_intset_16_get(is, idx) == arr[idx]);
        gt_ensure(
          gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] + 1) ==
          idx + 1);
        gt_ensure(
          gt_intset_16_get_idx_smallest_geq(is, arr[idx] + 1) ==
          idx + 1);
      }
      if (!had_err)
        had_err = gt_intset_unit_test_notinset(is, 0, arr[0] - 1, err);
      if (!had_err)
        had_err = gt_intset_unit_test_check_seqnum(is, 0, arr[0] - 1, 0, err);
      for (idx = (GtUword) 1; !had_err && idx < num_of_elems; idx++) {
        had_err = gt_intset_unit_test_notinset(is, arr[idx - 1] + 1,
                                               arr[idx] - 1, err);
        if (!had_err)
          had_err = gt_intset_unit_test_check_seqnum(is, arr[idx - 1] + 1,
                                                     arr[idx] - 1, idx, err);
      }
      gt_intset_delete(is);
    }
  }
  gt_free(arr);
  return had_err;
}
Пример #10
0
int gt_string_matching_unit_test(GtError *err)
{
  char s[STRING_MATCHING_MAX_STRING_LENGTH+1],
       p[STRING_MATCHING_MAX_PATTERN_LENGTH+1], *text = "foo";
  GtArray *brute_force_matches,
        *bmh_matches,
        *kmp_matches,
        *shift_and_matches;
  unsigned long i, brute_force_match, bmh_match, kmp_match, shift_and_match;
  int had_err = 0;

  gt_error_check(err);

  brute_force_matches = gt_array_new(sizeof (unsigned long));
  bmh_matches = gt_array_new(sizeof (unsigned long));
  kmp_matches = gt_array_new(sizeof (unsigned long));
  shift_and_matches = gt_array_new(sizeof (unsigned long));

  /* match the empty pattern */
  gt_string_matching_brute_force(text, strlen(text), "", 0, store_match,
                              brute_force_matches);
  gt_string_matching_bmh(text, strlen(text), "", 0, store_match, bmh_matches);
  gt_string_matching_kmp(text, strlen(text), "", 0, store_match, kmp_matches);
  gt_string_matching_shift_and(text, strlen(text), "", 0, store_match,
                            shift_and_matches);

  ensure(had_err, !gt_array_size(brute_force_matches));
  ensure(had_err, !gt_array_size(bmh_matches));
  ensure(had_err, !gt_array_size(kmp_matches));
  ensure(had_err, !gt_array_size(shift_and_matches));

  for (i = 0; !had_err && i < STRING_MATCHING_NUM_OF_TESTS; i++) {
    unsigned long j, n, m;
    /* generate random string and pattern */
    n = gt_rand_max(STRING_MATCHING_MAX_STRING_LENGTH);
    m = gt_rand_max(STRING_MATCHING_MAX_PATTERN_LENGTH);
    for (j = 0; j < n; j++)
      s[j] = gt_rand_char();
    s[n] = '\0';
    for (j = 0; j < m; j++)
      p[j] = gt_rand_char();
    p[m] = '\0';
    /* matching (first match) */
    brute_force_match = GT_UNDEF_ULONG;
    bmh_match = GT_UNDEF_ULONG;
    kmp_match = GT_UNDEF_ULONG;
    shift_and_match = GT_UNDEF_ULONG;
    gt_string_matching_brute_force(s, n, p, m, store_first_match,
                                &brute_force_match);
    gt_string_matching_bmh(s, n, p, m, store_first_match, &bmh_match);
    gt_string_matching_kmp(s, n, p, m, store_first_match, &kmp_match);
    gt_string_matching_shift_and(s, n, p, m, store_first_match,
                                 &shift_and_match);
    /* comparing (first match) */
    ensure(had_err, brute_force_match == bmh_match);
    ensure(had_err, brute_force_match == kmp_match);
    ensure(had_err, brute_force_match == shift_and_match);
    /* matching (all matches) */
    gt_string_matching_brute_force(s, n, p, m, store_match,
                                   brute_force_matches);
    gt_string_matching_bmh(s, n, p, m, store_match, bmh_matches);
    gt_string_matching_kmp(s, n, p, m, store_match, kmp_matches);
    gt_string_matching_shift_and(s, n, p, m, store_match, shift_and_matches);
    /* comparing (all matches) */
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(bmh_matches));
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(kmp_matches));
    ensure(had_err, gt_array_size(brute_force_matches) ==
                    gt_array_size(shift_and_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, bmh_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, kmp_matches));
    ensure(had_err, !gt_array_cmp(brute_force_matches, shift_and_matches));
    /* reset */
    gt_array_reset(brute_force_matches);
    gt_array_reset(bmh_matches);
    gt_array_reset(kmp_matches);
    gt_array_reset(shift_and_matches);
  }

  gt_array_delete(shift_and_matches);
  gt_array_delete(bmh_matches);
  gt_array_delete(kmp_matches);
  gt_array_delete(brute_force_matches);

  return had_err;
}
Пример #11
0
int gt_dlist_unit_test(GtError *err)
{
  GtDlist *dlist;
  GtDlistelem *dlistelem;
  int i, j, size, *data,
      elem_a = 7,
      elem_b = 6,
      elems[MAX_SIZE],
      elems_backup[MAX_SIZE],
      had_err = 0;
  gt_error_check(err);

  /* boundary case: empty dlist */
  dlist = gt_dlist_new(intcompare);
  ensure(had_err, !gt_dlist_size(dlist));
  gt_dlist_delete(dlist);

  dlist = gt_dlist_new(NULL);
  ensure(had_err, !gt_dlist_size(dlist));
  gt_dlist_delete(dlist);

  /* boundary case: dlist containing one element */
  dlist = gt_dlist_new(intcompare);
  gt_dlist_add(dlist, &elem_a);
  ensure(had_err, gt_dlist_size(dlist) == 1);
  ensure(had_err,
         elem_a == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist)));
  gt_dlist_delete(dlist);

  dlist = gt_dlist_new(NULL);
  gt_dlist_add(dlist, &elem_a);
  ensure(had_err, gt_dlist_size(dlist) == 1);
  ensure(had_err,
         elem_a == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist)));
  gt_dlist_delete(dlist);

  /* boundary case: dlist containing two elements */
  dlist = gt_dlist_new(intcompare);
  gt_dlist_add(dlist, &elem_a);
  gt_dlist_add(dlist, &elem_b);
  ensure(had_err, gt_dlist_size(dlist) == 2);
  ensure(had_err,
         elem_b == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist)));
  gt_dlist_delete(dlist);

  dlist = gt_dlist_new(NULL);
  gt_dlist_add(dlist, &elem_a);
  gt_dlist_add(dlist, &elem_b);
  ensure(had_err, gt_dlist_size(dlist) == 2);
  ensure(had_err,
         elem_a == *(int*) gt_dlistelem_get_data(gt_dlist_first(dlist)));
  gt_dlist_delete(dlist);

  for (i = 0; i < NUM_OF_TESTS && !had_err; i++) {
    /* construct the random elements for the list */
    size = gt_rand_max(MAX_SIZE);
    for (j = 0; j < size; j++) {
      elems[j] = gt_rand_max(INT_MAX);
      elems_backup[j] = elems[j];
    }

    /* sort the backup elements */
    qsort(elems_backup, size, sizeof (int), intcompare);

    /* test with compare function */
    dlist = gt_dlist_new(intcompare);
    ensure(had_err, !gt_dlist_size(dlist));
    for (j = 0; j < size && !had_err; j++) {
      gt_dlist_add(dlist, elems + j);
      ensure(had_err, gt_dlist_size(dlist) == j+1);

      for (dlistelem = gt_dlist_first(dlist); dlistelem != NULL;
           dlistelem = gt_dlistelem_next(dlistelem)) {
      }
    }
    j = 0;
    for (dlistelem = gt_dlist_first(dlist); dlistelem != NULL;
         dlistelem = gt_dlistelem_next(dlistelem)) {
      data = gt_dlistelem_get_data(dlistelem);
      ensure(had_err, *data == elems_backup[j]);
      j++;
    }
    /* test gt_dlist_find() */
    for (j = 0; j < size; j++) {
      dlistelem = gt_dlist_find(dlist, elems_backup + j);
      ensure(had_err, dlistelem);
      ensure(had_err,
             *(int*) gt_dlistelem_get_data(dlistelem) == elems_backup[j]);
    }
    /* remove first element */
    if (gt_dlist_size(dlist)) {
      gt_dlist_remove(dlist, gt_dlist_first(dlist));
      if (gt_dlist_size(dlist)) {
        data = gt_dlistelem_get_data(gt_dlist_first(dlist));
        ensure(had_err, *data == elems_backup[1]);
      }
    }
    /* remove last element */
    if (gt_dlist_size(dlist)) {
      gt_dlist_remove(dlist, gt_dlist_last(dlist));
      if (gt_dlist_size(dlist)) {
        data = gt_dlistelem_get_data(gt_dlist_last(dlist));
        ensure(had_err, *data == elems_backup[size - 2]);
      }
    }
    /* XXX: fix this */
#if 0
    /* remove middle element */
    if (gt_dlist_size(dlist) >= 2) {
      dlistelem = gt_dlist_first(dlist);
      for (j = 1; j < gt_dlist_size(dlist) / 2; j++)
        dlistelem = gt_dlistelem_next(dlistelem);
      gt_dlist_remove(dlist, dlistelem);
      dlistelem = gt_dlist_first(dlist);
      for (j = 1; j < gt_dlist_size(dlist) / 2 + 1; j++)
        dlistelem = gt_dlistelem_next(dlistelem);
      data = gt_dlistelem_get_data(gt_dlist_last(dlist));
      ensure(had_err, *data == elems_backup[size / 2 + 1]);
    }
#endif
    gt_dlist_delete(dlist);

    /* test without compare function */
    dlist = gt_dlist_new(NULL);
    ensure(had_err, !gt_dlist_size(dlist));
    for (j = 0; j < size && !had_err; j++) {
      gt_dlist_add(dlist, elems + j);
      ensure(had_err, gt_dlist_size(dlist) == j+1);
    }
    j = 0;
    for (dlistelem = gt_dlist_first(dlist); dlistelem != NULL;
         dlistelem = gt_dlistelem_next(dlistelem)) {
      data = gt_dlistelem_get_data(dlistelem);
      ensure(had_err, *data == elems[j]);
      j++;
    }
    /* remove first element */
    if (gt_dlist_size(dlist)) {
      gt_dlist_remove(dlist, gt_dlist_first(dlist));
      if (gt_dlist_size(dlist)) {
        data = gt_dlistelem_get_data(gt_dlist_first(dlist));
        ensure(had_err, *data == elems[1]);
      }
    }
    /* remove last element */
    if (gt_dlist_size(dlist)) {
      gt_dlist_remove(dlist, gt_dlist_last(dlist));
      if (gt_dlist_size(dlist)) {
        data = gt_dlistelem_get_data(gt_dlist_last(dlist));
        ensure(had_err, *data == elems[size - 2]);
      }
    }
    gt_dlist_delete(dlist);
  }

  return had_err;
}
Пример #12
0
int gt_interval_tree_unit_test(GT_UNUSED GtError *err)
{
  GtIntervalTree *it = NULL;
  GtIntervalTreeNode *res = NULL;
  unsigned long i = 0;
  int had_err = 0, num_testranges = 3000,
      num_samples = 300000, num_find_all_samples = 10000,
      gt_range_max_basepos = 90000, width = 700,
      query_width = 5000;
  GtRange *res_rng = NULL, qrange;
  GtArray *arr = NULL, *narr = NULL;

  arr = gt_array_new(sizeof (GtRange*));

  /* generate test ranges */
  for (i = 0;i<num_testranges;i++)
  {
    unsigned long start;
    GtRange *rng;
    rng  = gt_calloc(1, sizeof (GtRange));
    start = gt_rand_max(gt_range_max_basepos);
    rng->start = start;
    rng->end = start + gt_rand_max(width);
    gt_array_add(arr, rng);
  }

  it = gt_interval_tree_new(gt_free_func);

  /* insert ranges */
  for (i = 0; i < num_testranges && !had_err; i++)
  {
    GtIntervalTreeNode *new_node;
    GtRange *rng;
    rng = *(GtRange**) gt_array_get(arr, i);
    new_node = gt_interval_tree_node_new(rng, rng->start, rng->end);
    gt_interval_tree_insert(it, new_node);
  }
  gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges);

  /* perform test queries */
  for (i = 0; i < num_samples && !had_err; i++)
  {
    unsigned long start = gt_rand_max(gt_range_max_basepos);
    qrange.start = start;
    qrange.end = start + gt_rand_max(width);
    res = gt_interval_tree_find_first_overlapping(it, qrange.start, qrange.end);
    if (res)
    {
      /* we have a hit, check if really overlapping */
      res_rng = (GtRange*) gt_interval_tree_node_get_data(res);
      gt_ensure(had_err, gt_range_overlap(&qrange, res_rng));
    } else {
      /* no hit, check whether there really is no overlapping
         interval in tree */
      GtRange *this_rng;
      unsigned long j;
      bool found = false;
      for (j = 0; j < gt_array_size(arr); j++)
      {
        this_rng = *(GtRange**) gt_array_get(arr, j);
        if (gt_range_overlap(this_rng, &qrange))
        {
          found = true;
          break;
        }
      }
      gt_ensure(had_err, !found);
    }
  }

  /* test searching for all overlapping intervals */
  for (i = 0; i < num_find_all_samples && !had_err; i++)
  {
    unsigned long start = gt_rand_max(gt_range_max_basepos);
    qrange.start = start;
    qrange.end = start + gt_rand_max(query_width);
    GtArray *res = gt_array_new(sizeof (GtRange*));
    gt_interval_tree_find_all_overlapping(it, qrange.start, qrange.end, res);
    if (res)
    {
      /* generate reference overlapping interval list by linear search */
      GtArray *ref;
      unsigned long j;
      ref = gt_array_new(sizeof (GtRange*));
      for (j = 0; j < gt_array_size(arr); j++)
      {
        GtRange *this_rng;
        this_rng = *(GtRange**) gt_array_get(arr, j);
        if (gt_range_overlap(this_rng, &qrange))
        {
          gt_array_add(ref, this_rng);
        }
      }
      /* compare reference with interval tree query result */
      gt_array_sort_stable(ref, range_ptr_compare);
      gt_array_sort_stable(res, range_ptr_compare);
      /* must be equal */
      gt_ensure(had_err, gt_array_cmp(ref, res)==0);
      gt_array_delete(ref);
    }
    gt_array_delete(res);
  }
  gt_interval_tree_delete(it);

  it = gt_interval_tree_new(NULL);
  gt_array_reset(arr);

  /* generate test ranges */
  for (i = 0;i<num_testranges && !had_err;i++)
  {
    unsigned long start;
    GtIntervalTreeNode *new_node;
    start = gt_rand_max(gt_range_max_basepos);
    new_node = gt_interval_tree_node_new((void*) i, start,
                                          start + gt_rand_max(width));
    gt_interval_tree_insert(it, new_node);
  }
  gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges);

  narr = gt_array_new(sizeof (GtIntervalTreeNode*));
  for (i = 0; i < num_testranges && !had_err; i++) {
    unsigned long idx, n, val;
    GtIntervalTreeNode *node = NULL;

    /* get all nodes referenced by the interval tree */
    interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0,
                                    gt_range_max_basepos+width, narr);

    /* remove a random node */
    idx = gt_rand_max(gt_array_size(narr)-1);
    node = *(GtIntervalTreeNode**) gt_array_get(narr, idx);
    gt_ensure(had_err, node != NULL);
    val = (unsigned long) gt_interval_tree_node_get_data(node);
    gt_interval_tree_remove(it, node);
    gt_array_reset(narr);

    /* make sure that the node has disappeared */
    gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges - (i+1));
    interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0,
                                    gt_range_max_basepos+width, narr);
    gt_ensure(had_err, gt_array_size(narr) == num_testranges - (i+1));
    for (n = 0; !had_err && n < gt_array_size(narr); n++) {
      GtIntervalTreeNode *onode = *(GtIntervalTreeNode**) gt_array_get(narr, n);
      gt_ensure(had_err, (unsigned long) gt_interval_tree_node_get_data(onode)
                           != val);
    }
  }

  gt_array_delete(arr);
  gt_array_delete(narr);
  gt_interval_tree_delete(it);
  return had_err;
}
static int gt_kmer_database_runner(GT_UNUSED int argc, const char **argv,
                                   int parsed_args, void *tool_arguments,
                                   GtError *err)
{
  GtKmerDatabaseArguments *arguments = tool_arguments;
  int had_err = 0;
  GtEncseq       *es;
  GtUword        es_length,
                 nu_kmer_codes = 0;
  GtKmerDatabase *compare_db = NULL,
                 *db = NULL;
  GtLogger *logger;
  FILE *fp = NULL;
  GtHashmap *kmer_hash = NULL;
  GtTimer *timer = NULL;

  gt_error_check(err);
  gt_assert(arguments);

  if (arguments->use_hash)
    kmer_hash = gt_hashmap_new(GT_HASH_DIRECT, NULL,
                               (GtFree) gt_kmer_database_delete_hash_value);
  if (arguments->bench)
    timer = gt_timer_new_with_progress_description("loading encoded sequence");

  logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr);

  if (arguments->verbose && gt_str_length(arguments->print_filename) > 0UL) {
    fp = gt_fa_fopen(gt_str_get(arguments->print_filename), "w", err);
    gt_logger_set_target(logger, fp);
  }

  if (!had_err) {
    GtEncseqLoader *es_l;
    if (arguments->bench)
      gt_timer_start(timer);
    es_l = gt_encseq_loader_new();
    es = gt_encseq_loader_load(es_l, argv[parsed_args], err);
    if (arguments->bench)
      gt_timer_show_progress(timer, "saving kmers (+iterating over file)",
                             stdout);
    if (es == NULL) {
      had_err = -1;
    }
    gt_encseq_loader_delete(es_l);
  }
  if (!had_err) {
    es_length = gt_encseq_total_length(es);
    if (es_length < (GtUword) arguments->kmersize) {
      gt_error_set(err, "Input is too short for used kmersize. File length: "
                   GT_WU " kmersize: %u", es_length, arguments->kmersize);
      had_err = -1;
    }
  }
  if (!had_err) {
    GtAlphabet *alphabet;
    alphabet = gt_encseq_alphabet(es);
    if (arguments->bench)
    nu_kmer_codes = gt_power_for_small_exponents(
                                            gt_alphabet_num_of_chars(alphabet),
                                            arguments->kmersize);
    if (!arguments->merge_only && !arguments->use_hash && !arguments->bench) {
      compare_db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize, arguments->sb_size, es);
    }
    if (!arguments->use_hash) {
      db = gt_kmer_database_new(gt_alphabet_num_of_chars(alphabet),
                                arguments->kmersize,
                                arguments->sb_size, es);
      if (arguments->cutoff) {
        if (arguments->mean_cutoff)
          gt_kmer_database_use_mean_cutoff(db, (GtUword) 2,
                                           arguments->cutoff_value);
        else
          gt_kmer_database_set_cutoff(db, arguments->cutoff_value);
        if (!arguments->prune)
          gt_kmer_database_set_prune(db);
      }
    }
  }

  if (!had_err) {
    GtUword startpos = 0,
            endpos;
    GtKmercodeiterator *iter;
    const GtKmercode *kmercode = NULL;
    iter = gt_kmercodeiterator_encseq_new(es, GT_READMODE_FORWARD,
                                          arguments->kmersize, 0);
    while (!had_err && startpos < es_length - (arguments->kmersize - 1)) {
      GtUword startpos_add_kmer = startpos;
      if (arguments->merge_only) {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max((arguments->sb_size - 1) * 2));
        if (endpos > es_length)
          endpos = es_length;
      }
      else {
        endpos = startpos + (arguments->kmersize - 1) +
                 (gt_rand_max(arguments->sb_size - 1));
      }
      gt_kmercodeiterator_reset(iter, GT_READMODE_FORWARD, startpos);
      while ((kmercode = gt_kmercodeiterator_encseq_next(iter)) != NULL &&
             startpos_add_kmer <= endpos - (arguments->kmersize - 1)) {
        if (!arguments->merge_only && !arguments->use_hash &&
            !kmercode->definedspecialposition && !arguments->bench) {
          gt_kmer_database_add_kmer(compare_db, kmercode->code,
                                    startpos_add_kmer);
        }
        if (arguments->use_hash && !kmercode->definedspecialposition) {
          gt_kmer_database_add_to_hash(kmer_hash, kmercode->code,
                                       startpos_add_kmer);
        }
        startpos_add_kmer++;
      }
      if (!arguments->use_hash) {
        gt_kmer_database_add_interval(db, startpos, endpos);
        gt_kmer_database_print_buffer(db, logger);
        if (!arguments->bench)
          had_err = gt_kmer_database_check_consistency(db, err);
      }
      startpos = endpos + 1;
    }
    if (!arguments->use_hash) {
      gt_kmer_database_flush(db);
      gt_kmer_database_print_buffer(db, logger);
      if (!had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(db, err);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_check_consistency(compare_db, err);
      if (!arguments->merge_only && !arguments->bench)
        gt_kmer_database_print(compare_db, logger, true);
      if (!arguments->merge_only && !had_err && !arguments->bench)
        had_err = gt_kmer_database_compare(compare_db, db, err);
      gt_kmer_database_print(db, logger, true);
    }
    gt_kmercodeiterator_delete(iter);
  }

  if (arguments->bench) {
    GtKmerStartpos pos;
    GtArrayGtUword *pos_hash;
    GtUword rand_access = (GtUword) 50000000,
            rand_code,
            i,
            sum = 0;
    gt_timer_show_progress(timer, "random access", stdout);
    for (i = 0; i < rand_access; i++) {
      rand_code = gt_rand_max(nu_kmer_codes - 1);
      if (arguments->use_hash) {
        pos_hash = gt_hashmap_get(kmer_hash, (const void *) rand_code);
        if (pos_hash != NULL)
          sum += pos_hash->spaceGtUword[pos_hash->nextfreeGtUword - 1];
      }
      else {
        pos = gt_kmer_database_get_startpos(db, rand_code);
        if (pos.no_positions > 0)
          sum += pos.startpos[pos.no_positions - 1];
      }
    }
    printf("sum: " GT_WU "\n", sum);

    gt_timer_show_progress(timer, "", stdout);
    gt_timer_stop(timer);
    gt_timer_delete(timer);
  }
  if (arguments->use_hash)
    gt_hashmap_delete(kmer_hash);
  gt_encseq_delete(es);
  if (!arguments->use_hash)
    gt_kmer_database_delete(db);
  if (!arguments->merge_only && !arguments->bench)
    gt_kmer_database_delete(compare_db);
  gt_logger_delete(logger);
  gt_fa_fclose(fp);

  return had_err;
}
Пример #14
0
static int gt_compressedbits_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtCompressdbitsArguments *arguments = tool_arguments;
  int had_err = 0;
  unsigned long idx;
  unsigned long long num_of_bits = 0ULL;
  GtBitsequence *bits = NULL;
  GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL;
  GtStr *filename = gt_str_new();
  FILE *fp = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  gt_assert(argc == parsed_args);

  if (gt_option_is_set(arguments->filename_op)) {
    FILE *file = NULL;
    gt_assert(arguments->filename != NULL);

    file = gt_xfopen(gt_str_get(arguments->filename), "r");
    if ((size_t) 1 != gt_xfread(&num_of_bits,
                                sizeof (num_of_bits), (size_t) 1, file)) {
      had_err = -1;
    }
    if (!had_err) {
      gt_log_log("bits to read: %llu", num_of_bits);
      arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits);
      bits = gt_malloc(sizeof (*bits) * arguments->size);
      if ((size_t) arguments->size !=
          gt_xfread(bits, sizeof (*bits),
                    (size_t) arguments->size, file)) {
        had_err = -1;
      }
    }
    gt_xfclose(file);
  }
  else {
    bits = gt_calloc(sizeof (*bits), (size_t) arguments->size);
    num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size);

    if (arguments->fill_random) {
      for (idx = 0; idx < arguments->size; idx++) {
        bits[idx] =
          (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX));
      }
    }
    else {
      for (idx = 0; idx < arguments->size; idx++)
        bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx);
    }
  }

  if (!had_err) {
    fp = gt_xtmpfp(filename);
    gt_fa_xfclose(fp);
    fp = NULL;

    gt_log_log("filename: %s", gt_str_get(filename));
    gt_log_log("size in words: %lu", arguments->size);
    cbs = gt_compressed_bitsequence_new(
                            bits, arguments->samplerate,
                            (unsigned long) num_of_bits);
    gt_log_log("original size in MB: %2.3f",
               (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0));
    gt_log_log("compressed size in MB: %2.3f",
               gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0));
    gt_log_log("popcount table size thereof in MB: %2.3f",
               gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0));
    had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err);
  }
  if (!had_err)
  {
    read_cbs =
      gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err);
    if (read_cbs == NULL)
      had_err = -1;
  }
  if (!had_err && bits != NULL && arguments->check_consistency) {
    for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) {
      int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx);
      int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0;
      gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit);
      gt_assert(original == bit);
    }
  }
  gt_compressed_bitsequence_delete(cbs);
  gt_compressed_bitsequence_delete(read_cbs);
  gt_free(bits);
  gt_str_delete(filename);
  return had_err;
}