int gt_string_matching_unit_test(GtError *err) { char s[STRING_MATCHING_MAX_STRING_LENGTH+1], p[STRING_MATCHING_MAX_PATTERN_LENGTH+1], *text = "foo"; GtArray *brute_force_matches, *bmh_matches, *kmp_matches, *shift_and_matches; unsigned long i, brute_force_match, bmh_match, kmp_match, shift_and_match; int had_err = 0; gt_error_check(err); brute_force_matches = gt_array_new(sizeof (unsigned long)); bmh_matches = gt_array_new(sizeof (unsigned long)); kmp_matches = gt_array_new(sizeof (unsigned long)); shift_and_matches = gt_array_new(sizeof (unsigned long)); /* match the empty pattern */ gt_string_matching_brute_force(text, strlen(text), "", 0, store_match, brute_force_matches); gt_string_matching_bmh(text, strlen(text), "", 0, store_match, bmh_matches); gt_string_matching_kmp(text, strlen(text), "", 0, store_match, kmp_matches); gt_string_matching_shift_and(text, strlen(text), "", 0, store_match, shift_and_matches); ensure(had_err, !gt_array_size(brute_force_matches)); ensure(had_err, !gt_array_size(bmh_matches)); ensure(had_err, !gt_array_size(kmp_matches)); ensure(had_err, !gt_array_size(shift_and_matches)); for (i = 0; !had_err && i < STRING_MATCHING_NUM_OF_TESTS; i++) { unsigned long j, n, m; /* generate random string and pattern */ n = gt_rand_max(STRING_MATCHING_MAX_STRING_LENGTH); m = gt_rand_max(STRING_MATCHING_MAX_PATTERN_LENGTH); for (j = 0; j < n; j++) s[j] = gt_rand_char(); s[n] = '\0'; for (j = 0; j < m; j++) p[j] = gt_rand_char(); p[m] = '\0'; /* matching (first match) */ brute_force_match = GT_UNDEF_ULONG; bmh_match = GT_UNDEF_ULONG; kmp_match = GT_UNDEF_ULONG; shift_and_match = GT_UNDEF_ULONG; gt_string_matching_brute_force(s, n, p, m, store_first_match, &brute_force_match); gt_string_matching_bmh(s, n, p, m, store_first_match, &bmh_match); gt_string_matching_kmp(s, n, p, m, store_first_match, &kmp_match); gt_string_matching_shift_and(s, n, p, m, store_first_match, &shift_and_match); /* comparing (first match) */ ensure(had_err, brute_force_match == bmh_match); ensure(had_err, brute_force_match == kmp_match); ensure(had_err, brute_force_match == shift_and_match); /* matching (all matches) */ gt_string_matching_brute_force(s, n, p, m, store_match, brute_force_matches); gt_string_matching_bmh(s, n, p, m, store_match, bmh_matches); gt_string_matching_kmp(s, n, p, m, store_match, kmp_matches); gt_string_matching_shift_and(s, n, p, m, store_match, shift_and_matches); /* comparing (all matches) */ ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(bmh_matches)); ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(kmp_matches)); ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(shift_and_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, bmh_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, kmp_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, shift_and_matches)); /* reset */ gt_array_reset(brute_force_matches); gt_array_reset(bmh_matches); gt_array_reset(kmp_matches); gt_array_reset(shift_and_matches); } gt_array_delete(shift_and_matches); gt_array_delete(bmh_matches); gt_array_delete(kmp_matches); gt_array_delete(brute_force_matches); return had_err; }
int gt_interval_tree_unit_test(GT_UNUSED GtError *err) { GtIntervalTree *it = NULL; GtIntervalTreeNode *res = NULL; unsigned long i = 0; int had_err = 0, num_testranges = 3000, num_samples = 300000, num_find_all_samples = 10000, gt_range_max_basepos = 90000, width = 700, query_width = 5000; GtRange *res_rng = NULL, qrange; GtArray *arr = NULL, *narr = NULL; arr = gt_array_new(sizeof (GtRange*)); /* generate test ranges */ for (i = 0;i<num_testranges;i++) { unsigned long start; GtRange *rng; rng = gt_calloc(1, sizeof (GtRange)); start = gt_rand_max(gt_range_max_basepos); rng->start = start; rng->end = start + gt_rand_max(width); gt_array_add(arr, rng); } it = gt_interval_tree_new(gt_free_func); /* insert ranges */ for (i = 0; i < num_testranges && !had_err; i++) { GtIntervalTreeNode *new_node; GtRange *rng; rng = *(GtRange**) gt_array_get(arr, i); new_node = gt_interval_tree_node_new(rng, rng->start, rng->end); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); /* perform test queries */ for (i = 0; i < num_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(width); res = gt_interval_tree_find_first_overlapping(it, qrange.start, qrange.end); if (res) { /* we have a hit, check if really overlapping */ res_rng = (GtRange*) gt_interval_tree_node_get_data(res); gt_ensure(had_err, gt_range_overlap(&qrange, res_rng)); } else { /* no hit, check whether there really is no overlapping interval in tree */ GtRange *this_rng; unsigned long j; bool found = false; for (j = 0; j < gt_array_size(arr); j++) { this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { found = true; break; } } gt_ensure(had_err, !found); } } /* test searching for all overlapping intervals */ for (i = 0; i < num_find_all_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(query_width); GtArray *res = gt_array_new(sizeof (GtRange*)); gt_interval_tree_find_all_overlapping(it, qrange.start, qrange.end, res); if (res) { /* generate reference overlapping interval list by linear search */ GtArray *ref; unsigned long j; ref = gt_array_new(sizeof (GtRange*)); for (j = 0; j < gt_array_size(arr); j++) { GtRange *this_rng; this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { gt_array_add(ref, this_rng); } } /* compare reference with interval tree query result */ gt_array_sort_stable(ref, range_ptr_compare); gt_array_sort_stable(res, range_ptr_compare); /* must be equal */ gt_ensure(had_err, gt_array_cmp(ref, res)==0); gt_array_delete(ref); } gt_array_delete(res); } gt_interval_tree_delete(it); it = gt_interval_tree_new(NULL); gt_array_reset(arr); /* generate test ranges */ for (i = 0;i<num_testranges && !had_err;i++) { unsigned long start; GtIntervalTreeNode *new_node; start = gt_rand_max(gt_range_max_basepos); new_node = gt_interval_tree_node_new((void*) i, start, start + gt_rand_max(width)); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); narr = gt_array_new(sizeof (GtIntervalTreeNode*)); for (i = 0; i < num_testranges && !had_err; i++) { unsigned long idx, n, val; GtIntervalTreeNode *node = NULL; /* get all nodes referenced by the interval tree */ interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); /* remove a random node */ idx = gt_rand_max(gt_array_size(narr)-1); node = *(GtIntervalTreeNode**) gt_array_get(narr, idx); gt_ensure(had_err, node != NULL); val = (unsigned long) gt_interval_tree_node_get_data(node); gt_interval_tree_remove(it, node); gt_array_reset(narr); /* make sure that the node has disappeared */ gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges - (i+1)); interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); gt_ensure(had_err, gt_array_size(narr) == num_testranges - (i+1)); for (n = 0; !had_err && n < gt_array_size(narr); n++) { GtIntervalTreeNode *onode = *(GtIntervalTreeNode**) gt_array_get(narr, n); gt_ensure(had_err, (unsigned long) gt_interval_tree_node_get_data(onode) != val); } } gt_array_delete(arr); gt_array_delete(narr); gt_interval_tree_delete(it); return had_err; }