int gth_match_processor(GthMatchProcessorInfo *info, GthSeqCon *gen_seq_con, GthSeqCon *ref_seq_con, GthMatch *match) { if (info->matchnumcounter) { info->matchnumcounter[match->Storeseqnumreference]++; if (info->maxnumofmatches > 0 && info->matchnumcounter[match->Storeseqnumreference] > info->maxnumofmatches) { /* discard matchA */ return 0; } } if (!(info->refseqisindex && !info->online) && match->Storeseqnumreference != info->lastrefseqnum && gt_array_size(info->matches)) { gt_assert(info->chain_collection && info->chaining_info); /* chain all current matches */ calc_chains_from_matches(info->chain_collection, info->matches, info->chaining_info, gen_seq_con, ref_seq_con, info->rare, info->fragweightfactor, info->jump_table_new, info->jump_table_new_reverse, info->jump_table_delete); /* and remove them afterwards */ gt_array_reset(info->matches); } /*...only if it does not equal the last one */ if (gt_array_size(info->matches) && gth_matches_are_equal(gt_array_get_last(info->matches), match)) { return 0; } gt_array_add_elem(info->matches, match, sizeof *match); /* update last reference sequence number */ info->lastrefseqnum = match->Storeseqnumreference; return 0; }
static int add_auto_sr_to_queue(GT_UNUSED void *key, void *value, void *data, GT_UNUSED GtError *err) { AutomaticSequenceRegion *auto_sr = value; GtQueue *genome_nodes = data; GtGenomeNode *gf; unsigned int i; gt_error_check(err); gt_assert(key && value && data); if (gt_array_size(auto_sr->feature_nodes)) { gt_queue_add(genome_nodes, auto_sr->sequence_region); auto_sr->sequence_region = NULL; for (i = 0; i < gt_array_size(auto_sr->feature_nodes); i++) { gf = *(GtGenomeNode**) gt_array_get(auto_sr->feature_nodes, i); gt_queue_add(genome_nodes, gf); } gt_array_reset(auto_sr->feature_nodes); } return 0; }
static int gt_load_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtLoadStream *load_stream; GtGenomeNode *node, *eofn; int had_err = 0; gt_error_check(err); load_stream = gt_load_stream_cast(ns); if (!load_stream->full) { while (!(had_err = gt_node_stream_next(load_stream->in_stream, &node, err)) && node) { if ((eofn = gt_eof_node_try_cast(node))) gt_genome_node_delete(eofn); /* get rid of EOF nodes */ else gt_array_add(load_stream->nodes, node); } if (!had_err) { load_stream->full = true; } } if (!had_err) { gt_assert(load_stream->full); if (load_stream->idx < gt_array_size(load_stream->nodes)) { *gn = *(GtGenomeNode**) gt_array_get(load_stream->nodes, load_stream->idx); load_stream->idx++; return 0; } } if (!had_err) { gt_array_reset(load_stream->nodes); *gn = NULL; } return had_err; }
int gt_range_unit_test(GtError *err) { static GtRange ranges_in[] = { { 620432, 620536 }, { 620432, 620536 }, { 620957, 621056 }, { 620957, 621056 }, { 625234, 625253 }, { 625500, 625655 }, { 625533, 625655 }, { 625533, 625655 }, { 627618, 627729 }, { 627618, 627729 }, { 627618, 627729 }, { 662083, 662194 }, { 662083, 662194 }, { 662083, 662194 }, { 663032, 663166 }, { 663032, 663166 }, { 663032, 663166 }, { 664782, 664906 }, { 664782, 664906 }, { 664782, 664906 }, { 665748, 665823 }, { 665748, 665823 }, { 665748, 665823 }, { 666825, 666881 }, { 666825, 666881 }, { 667797, 667954 }, { 667845, 667954 }, { 667845, 667954 }, { 679175, 679280 }, { 679175, 679280 }, { 679175, 679280 }, { 680427, 680540 }, { 680427, 680540 }, { 680427, 680540 }, { 684144, 684293 }, { 684144, 684293 }, { 684144, 684293 }, { 724903, 724985 }, { 724903, 724985 }, { 727099, 727325 }, { 727099, 727325 }, { 732544, 732821 }, { 732544, 732821 }, { 750016, 750280 }, { 750016, 750280 }, { 769508, 769734 }, { 769508, 769734 } }, ranges_out[] = { { 620432, 620536 }, { 620957, 621056 }, { 625234, 625253 }, { 625500, 625655 }, { 625533, 625655 }, { 627618, 627729 }, { 662083, 662194 }, { 663032, 663166 }, { 664782, 664906 }, { 665748, 665823 }, { 666825, 666881 }, { 667797, 667954 }, { 667845, 667954 }, { 679175, 679280 }, { 680427, 680540 }, { 684144, 684293 }, { 724903, 724985 }, { 727099, 727325 }, { 732544, 732821 }, { 750016, 750280 }, { 769508, 769734 }}; GtUword counts[] = { 2, 2, 1, 1, 2, 3, 3, 3, 3, 3, 2, 1, 2, 3, 3, 3, 2, 2, 2, 2, 2 }; GtArray *ranges, *tmp_ranges, *ctr; GtUword i; int had_err = 0; gt_error_check(err); gt_ensure(sizeof (ranges_out) / sizeof (ranges_out[0]) == sizeof (counts) / sizeof (counts[0])); /* test gt_ranges_uniq() */ ranges = gt_array_new(sizeof (GtRange)); tmp_ranges = gt_array_new(sizeof (GtRange)); for (i = 0; i < sizeof (ranges_in) / sizeof (ranges_in[0]) && !had_err; i++) gt_array_add(ranges, ranges_in[i]); gt_ranges_uniq(tmp_ranges, ranges); gt_ensure(gt_array_size(ranges) == sizeof (ranges_in) / sizeof (ranges_in[0])); gt_ensure(gt_array_size(tmp_ranges) == sizeof (ranges_out) / sizeof (ranges_out[0])); for (i = 0; i < gt_array_size(tmp_ranges) && !had_err; i++) { gt_ensure(ranges_out[i].start == (*(GtRange*) gt_array_get(tmp_ranges, i)).start); gt_ensure(ranges_out[i].end == (*(GtRange*) gt_array_get(tmp_ranges, i)).end); } /* test gt_ranges_uniq_in_place() */ gt_array_reset(tmp_ranges); gt_array_add_array(tmp_ranges, ranges); gt_ranges_uniq_in_place(tmp_ranges); for (i = 0; i < gt_array_size(tmp_ranges) && !had_err; i++) { gt_ensure(ranges_out[i].start == (*(GtRange*) gt_array_get(tmp_ranges, i)).start); gt_ensure(ranges_out[i].end == (*(GtRange*) gt_array_get(tmp_ranges, i)).end); } /* test gt_ranges_uniq_count() */ gt_array_reset(tmp_ranges); ctr = gt_ranges_uniq_count(tmp_ranges, ranges); gt_ensure(gt_array_size(tmp_ranges) == gt_array_size(ctr)); gt_ensure( gt_array_size(ctr) == sizeof (counts) / sizeof (counts[0])); for (i = 0; i < gt_array_size(ctr) && !had_err; i++) { gt_ensure(counts[i] == *(GtUword*) gt_array_get(ctr, i)); gt_ensure(ranges_out[i].start == (*(GtRange*) gt_array_get(tmp_ranges, i)).start); gt_ensure(ranges_out[i].end == (*(GtRange*) gt_array_get(tmp_ranges, i)).end); } gt_array_delete(ctr); /* test gt_ranges_uniq_in_place_count() */ ctr = gt_ranges_uniq_in_place_count(ranges); gt_ensure(gt_array_size(ranges) == gt_array_size(ctr)); gt_ensure( gt_array_size(ctr) == sizeof (counts) / sizeof (counts[0])); for (i = 0; i < gt_array_size(ctr) && !had_err; i++) { gt_ensure(counts[i] == *(GtUword*) gt_array_get(ctr, i)); gt_ensure( ranges_out[i].start == (*(GtRange*) gt_array_get(ranges, i)).start); gt_ensure( ranges_out[i].end == (*(GtRange*) gt_array_get(ranges, i)).end); } gt_array_delete(ctr); /* test gt_range_reorder() */ if (!had_err) { GtRange range = { 1, 100 }; range = gt_range_reorder(range); gt_ensure(range.start == 1 && range.end == 100); range.start = 100; range.end = 1; range = gt_range_reorder(range); gt_ensure(range.start == 1 && range.end == 100); } /* free */ gt_array_delete(ranges); gt_array_delete(tmp_ranges); return had_err; }
static int hcr_next_seq_qual(GtHcrSeqDecoder *seq_dec, char *seq, char *qual, GtError *err) { enum state { HCR_ERROR = -1, END, SUCCESS }; unsigned char base; GtUword i, nearestsample, *symbol; size_t startofnearestsample = 0; enum state status = END; FastqFileInfo cur_read; FastqFileInfo *fileinfo = NULL; if (seq_dec->cur_read <= seq_dec->num_of_reads) { status = SUCCESS; if (seq_dec->symbols == NULL) seq_dec->symbols = gt_array_new(sizeof (GtUword)); else gt_array_reset(seq_dec->symbols); cur_read.readnum = seq_dec->cur_read; gt_log_log("cur_read: "GT_WU"",seq_dec->cur_read); fileinfo = (FastqFileInfo *)gt_rbtree_next_key(seq_dec->file_info_rbt, &cur_read, hcr_cmp_FastqFileInfo, NULL); gt_assert(fileinfo); /* reset huffman_decoder if next read is sampled */ if (gt_sampling_get_next_elementnum(seq_dec->sampling) == seq_dec->cur_read) { gt_log_log("reset because sampled read is next"); (void) gt_sampling_get_next_sample(seq_dec->sampling, &nearestsample, &startofnearestsample); reset_data_iterator_to_pos(seq_dec->data_iter, startofnearestsample); (void) gt_huffman_decoder_get_new_mem_chunk(seq_dec->huff_dec, err); if (gt_error_is_set(err)) status = HCR_ERROR; } if (status != HCR_ERROR) { int ret; ret = gt_huffman_decoder_next(seq_dec->huff_dec, seq_dec->symbols, fileinfo->readlength, err); if (ret != 1) status = HCR_ERROR; if (ret == 0) gt_error_set(err, "reached end of file"); } if (qual || seq) { gt_log_log("set strings"); for (i = 0; i < gt_array_size(seq_dec->symbols); i++) { symbol = (GtUword*) gt_array_get(seq_dec->symbols, i); if (qual != NULL) qual[i] = get_qual_from_symbol(seq_dec, *symbol); if (seq != NULL) { base = get_base_from_symbol(seq_dec, *symbol); seq[i] = (char)toupper(gt_alphabet_decode(seq_dec->alpha, (GtUchar) base)); } } if (qual != NULL) qual[gt_array_size(seq_dec->symbols)] = '\0'; if (seq != NULL) seq[gt_array_size(seq_dec->symbols)] = '\0'; } seq_dec->cur_read++; } return (int) status; }
static void compute_csas(ConsensusSA *csa) { unsigned long i, sa_i, sa_i_size = 0, sa_prime, sa_prime_size; GtArray *splice_form; GtBittab **C, **left, **right, **L, **R, *U_i, *SA_i, *SA_prime; #ifndef NDEBUG unsigned long u_i_size, u_i_minus_1_size; gt_assert(csa && csa->set_of_sas); #endif /* init sets */ C = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); left = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); right = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); L = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); R = gt_malloc(sizeof (GtBittab*) * csa->number_of_sas); for (i = 0; i < csa->number_of_sas; i++) { C[i] = gt_bittab_new(csa->number_of_sas); left[i] = gt_bittab_new(csa->number_of_sas); right[i] = gt_bittab_new(csa->number_of_sas); L[i] = gt_bittab_new(csa->number_of_sas); R[i] = gt_bittab_new(csa->number_of_sas); } U_i = gt_bittab_new(csa->number_of_sas); SA_i = gt_bittab_new(csa->number_of_sas); SA_prime = gt_bittab_new(csa->number_of_sas); splice_form = gt_array_new(sizeof (unsigned long)); /* compute sets */ compute_C(C, csa); compute_left(left, csa); compute_right(right, csa); compute_L(L, C, left, csa->number_of_sas); compute_R(R, C, right, csa->number_of_sas); /* U_0 = SA */ for (i = 0; i < csa->number_of_sas; i++) gt_bittab_set_bit(U_i, i); #ifndef NDEBUG /* preparation for assertion below */ u_i_minus_1_size = gt_bittab_count_set_bits(U_i); #endif while (gt_bittab_is_true(U_i)) { sa_i = GT_UNDEF_ULONG; for (sa_prime = gt_bittab_get_first_bitnum(U_i); sa_prime != gt_bittab_get_last_bitnum(U_i); sa_prime = gt_bittab_get_next_bitnum(U_i, sa_prime)) { if (sa_i == GT_UNDEF_ULONG) { sa_i = sa_prime; gt_bittab_or(SA_i, L[sa_i], R[sa_i]); sa_i_size = gt_bittab_count_set_bits(SA_i); } else { gt_bittab_or(SA_prime, L[sa_prime], R[sa_prime]); sa_prime_size = gt_bittab_count_set_bits(SA_prime); if (sa_prime_size > sa_i_size) { sa_i = sa_prime; sa_i_size = sa_prime_size; gt_bittab_equal(SA_i, SA_prime); } } } /* make sure the computed splice form is maximal w.r.t. to compatibility */ gt_assert(splice_form_is_valid(SA_i, csa)); /* process splice form */ if (csa->process_splice_form) { gt_array_reset(splice_form); gt_bittab_get_all_bitnums(SA_i, splice_form); csa->process_splice_form(splice_form, csa->set_of_sas, csa->number_of_sas, csa->size_of_sa, csa->userdata); } /* U_i = U_i-1 \ SA_i */ gt_bittab_nand(U_i, U_i, SA_i); #ifndef NDEBUG /* ensure that |U_i| < |U_i-1| */ u_i_size = gt_bittab_count_set_bits(U_i); gt_assert(u_i_size < u_i_minus_1_size); u_i_minus_1_size = u_i_size; #endif } /* free sets */ for (i = 0; i < csa->number_of_sas; i++) { gt_bittab_delete(C[i]); gt_bittab_delete(left[i]); gt_bittab_delete(right[i]); gt_bittab_delete(L[i]); gt_bittab_delete(R[i]); } gt_free(C); gt_free(left); gt_free(right); gt_free(L); gt_free(R); gt_bittab_delete(U_i); gt_bittab_delete(SA_i); gt_bittab_delete(SA_prime); gt_array_delete(splice_form); }
int gt_ovlfind_kmp_unit_test(GtError *err) { int had_err = 0; GtArray *a; struct GtOvlfindKmpResult *r; GtContfind retval; gt_kmp_t *u_pi, *v_pi; /*@i1@*/ gt_error_check(err); had_err = gt_kmp_preproc_unit_test(err); if (had_err != 0) return had_err; a = gt_array_new(sizeof (struct GtOvlfindKmpResult)); /* u suffix == v prefix */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("aacgcacctg", 10UL); v_pi = gt_kmp_preproc("acctgatttc", 10UL); retval = gt_ovlfind_kmp("aacgcacctg", 10UL, u_pi, "acctgatttc", 10UL, v_pi, GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_NO); gt_ensure(had_err, gt_array_size(a) == 1UL); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 5UL); gt_free(u_pi); gt_free(v_pi); } /* v suffix == u prefix */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("atccgtgacgtg", 12UL); v_pi = gt_kmp_preproc("aagaagaatccg", 12UL); retval = gt_ovlfind_kmp("atccgtgacgtg", 12UL, u_pi, "aagaagaatccg", 12UL, v_pi, GT_OVLFIND_ALL, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_NO); gt_ensure(had_err, gt_array_size(a) == 1UL); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 5UL); gt_free(u_pi); gt_free(v_pi); } /* no overlap */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("aac", 3UL); v_pi = gt_kmp_preproc("tgc", 3UL); retval = gt_ovlfind_kmp("aac", 3UL, u_pi, "tgc", 3UL, v_pi, GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_NO); gt_ensure(had_err, gt_array_size(a) == 0UL); gt_free(u_pi); gt_free(v_pi); } /* u suffix of v */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("acagc", 5UL); v_pi = gt_kmp_preproc("gtacagc", 7UL); retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi, GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, gt_array_size(a) == 1UL); gt_ensure(had_err, retval == GT_CONTFIND_OFF); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 5UL); gt_array_reset(a); retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi, GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_U); gt_ensure(had_err, gt_array_size(a) == 0UL); gt_array_reset(a); retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi, GT_OVLFIND_CNT, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, gt_array_size(a) == 0UL); gt_ensure(had_err, retval == GT_CONTFIND_U); gt_array_reset(a); retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "gtacagc", 7UL, v_pi, GT_OVLFIND_ALL, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, gt_array_size(a) == 1UL); gt_ensure(had_err, retval == GT_CONTFIND_U); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 5UL); gt_free(u_pi); gt_free(v_pi); } /* v suffix of u */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("gtacagc", 7UL); v_pi = gt_kmp_preproc("acagc", 5UL); retval = gt_ovlfind_kmp("gtacagc", 7UL, u_pi, "acagc", 5UL, v_pi, GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_ensure(had_err, gt_array_size(a) == 1UL); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 5UL); gt_array_reset(a); retval = gt_ovlfind_kmp("gtacagc", 7UL, u_pi, "acagc", 5UL, v_pi, GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_V); gt_ensure(had_err, gt_array_size(a) == 0UL); gt_free(u_pi); gt_free(v_pi); } /* u prefix of v */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("ctat", 4UL); v_pi = gt_kmp_preproc("ctatacagg", 9UL); retval = gt_ovlfind_kmp("ctat", 4UL, u_pi, "ctatacagg", 9UL, v_pi, GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_ensure(had_err, gt_array_size(a) == 1UL); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 4UL); gt_array_reset(a); retval = gt_ovlfind_kmp("ctat", 4UL, u_pi, "ctatacagg", 9UL, v_pi, GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_U); gt_ensure(had_err, gt_array_size(a) == 0UL); gt_free(u_pi); gt_free(v_pi); } /* v prefix of u */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("ctatacagg", 9UL); v_pi = gt_kmp_preproc("ctat", 4UL); retval = gt_ovlfind_kmp("ctatacagg", 9UL, u_pi, "ctat", 4UL, v_pi, GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_ensure(had_err, gt_array_size(a) == 1UL); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, false, 4UL); gt_array_reset(a); retval = gt_ovlfind_kmp("ctatacagg", 9UL, u_pi, "ctat", 4UL, v_pi, GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_V); gt_ensure(had_err, gt_array_size(a) == 0UL); gt_free(u_pi); gt_free(v_pi); } /* identical sequences */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("acagc", 5UL); retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "acagc", 5UL, u_pi, GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_ensure(had_err, gt_array_size(a) == 2UL); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 5UL); GT_OVLFIND_KMP_EXPECT_RESULT(1UL, false, 5UL); gt_array_reset(a); retval = gt_ovlfind_kmp("acagc", 5UL, u_pi, "acagc", 5UL, u_pi, GT_OVLFIND_PROPER_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_EQ); gt_ensure(had_err, gt_array_size(a) == 0UL); gt_free(u_pi); } /* find_nonmaximal */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("aacagtagtagt", 12UL); v_pi = gt_kmp_preproc("agtagtagttaa", 12UL); retval = gt_ovlfind_kmp("aacagtagtagt", 12UL, u_pi, "agtagtagttaa", 12UL, v_pi, GT_OVLFIND_SPM, 1UL, false, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_ensure(had_err, gt_array_size(a) == 2UL); GT_OVLFIND_KMP_EXPECT_RESULT(0UL, true, 9UL); GT_OVLFIND_KMP_EXPECT_RESULT(1UL, false, 2UL); gt_array_reset(a); retval = gt_ovlfind_kmp("aacagtagtagt", 12UL, u_pi, "agtagtagttaa", 12UL, v_pi, GT_OVLFIND_SPM, 1UL, true, ovlfind_kmp_test_save, a); gt_ensure(had_err, gt_array_size(a) == 5UL); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_free(u_pi); gt_free(v_pi); } /* min_length */ if (!had_err) { gt_array_reset(a); u_pi = gt_kmp_preproc("aggaccagtagt", 12UL); v_pi = gt_kmp_preproc("agtagttactac", 12UL); retval = gt_ovlfind_kmp("aggaccagtagt", 12UL, u_pi, "agtagttactac", 12UL, v_pi, GT_OVLFIND_SPM, 1UL, true, ovlfind_kmp_test_save, a); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_ensure(had_err, gt_array_size(a) == 2UL); gt_array_reset(a); retval = gt_ovlfind_kmp("aggaccagtagt", 12UL, u_pi, "agtagttactac", 12UL, v_pi, GT_OVLFIND_SPM, 4UL, true, ovlfind_kmp_test_save, a); gt_ensure(had_err, gt_array_size(a) == 1UL); gt_ensure(had_err, retval == GT_CONTFIND_OFF); gt_free(u_pi); gt_free(v_pi); } gt_array_delete(a); return had_err; }
int gt_string_matching_unit_test(GtError *err) { char s[STRING_MATCHING_MAX_STRING_LENGTH+1], p[STRING_MATCHING_MAX_PATTERN_LENGTH+1], *text = "foo"; GtArray *brute_force_matches, *bmh_matches, *kmp_matches, *shift_and_matches; unsigned long i, brute_force_match, bmh_match, kmp_match, shift_and_match; int had_err = 0; gt_error_check(err); brute_force_matches = gt_array_new(sizeof (unsigned long)); bmh_matches = gt_array_new(sizeof (unsigned long)); kmp_matches = gt_array_new(sizeof (unsigned long)); shift_and_matches = gt_array_new(sizeof (unsigned long)); /* match the empty pattern */ gt_string_matching_brute_force(text, strlen(text), "", 0, store_match, brute_force_matches); gt_string_matching_bmh(text, strlen(text), "", 0, store_match, bmh_matches); gt_string_matching_kmp(text, strlen(text), "", 0, store_match, kmp_matches); gt_string_matching_shift_and(text, strlen(text), "", 0, store_match, shift_and_matches); ensure(had_err, !gt_array_size(brute_force_matches)); ensure(had_err, !gt_array_size(bmh_matches)); ensure(had_err, !gt_array_size(kmp_matches)); ensure(had_err, !gt_array_size(shift_and_matches)); for (i = 0; !had_err && i < STRING_MATCHING_NUM_OF_TESTS; i++) { unsigned long j, n, m; /* generate random string and pattern */ n = gt_rand_max(STRING_MATCHING_MAX_STRING_LENGTH); m = gt_rand_max(STRING_MATCHING_MAX_PATTERN_LENGTH); for (j = 0; j < n; j++) s[j] = gt_rand_char(); s[n] = '\0'; for (j = 0; j < m; j++) p[j] = gt_rand_char(); p[m] = '\0'; /* matching (first match) */ brute_force_match = GT_UNDEF_ULONG; bmh_match = GT_UNDEF_ULONG; kmp_match = GT_UNDEF_ULONG; shift_and_match = GT_UNDEF_ULONG; gt_string_matching_brute_force(s, n, p, m, store_first_match, &brute_force_match); gt_string_matching_bmh(s, n, p, m, store_first_match, &bmh_match); gt_string_matching_kmp(s, n, p, m, store_first_match, &kmp_match); gt_string_matching_shift_and(s, n, p, m, store_first_match, &shift_and_match); /* comparing (first match) */ ensure(had_err, brute_force_match == bmh_match); ensure(had_err, brute_force_match == kmp_match); ensure(had_err, brute_force_match == shift_and_match); /* matching (all matches) */ gt_string_matching_brute_force(s, n, p, m, store_match, brute_force_matches); gt_string_matching_bmh(s, n, p, m, store_match, bmh_matches); gt_string_matching_kmp(s, n, p, m, store_match, kmp_matches); gt_string_matching_shift_and(s, n, p, m, store_match, shift_and_matches); /* comparing (all matches) */ ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(bmh_matches)); ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(kmp_matches)); ensure(had_err, gt_array_size(brute_force_matches) == gt_array_size(shift_and_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, bmh_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, kmp_matches)); ensure(had_err, !gt_array_cmp(brute_force_matches, shift_and_matches)); /* reset */ gt_array_reset(brute_force_matches); gt_array_reset(bmh_matches); gt_array_reset(kmp_matches); gt_array_reset(shift_and_matches); } gt_array_delete(shift_and_matches); gt_array_delete(bmh_matches); gt_array_delete(kmp_matches); gt_array_delete(brute_force_matches); return had_err; }
int gt_interval_tree_unit_test(GT_UNUSED GtError *err) { GtIntervalTree *it = NULL; GtIntervalTreeNode *res = NULL; unsigned long i = 0; int had_err = 0, num_testranges = 3000, num_samples = 300000, num_find_all_samples = 10000, gt_range_max_basepos = 90000, width = 700, query_width = 5000; GtRange *res_rng = NULL, qrange; GtArray *arr = NULL, *narr = NULL; arr = gt_array_new(sizeof (GtRange*)); /* generate test ranges */ for (i = 0;i<num_testranges;i++) { unsigned long start; GtRange *rng; rng = gt_calloc(1, sizeof (GtRange)); start = gt_rand_max(gt_range_max_basepos); rng->start = start; rng->end = start + gt_rand_max(width); gt_array_add(arr, rng); } it = gt_interval_tree_new(gt_free_func); /* insert ranges */ for (i = 0; i < num_testranges && !had_err; i++) { GtIntervalTreeNode *new_node; GtRange *rng; rng = *(GtRange**) gt_array_get(arr, i); new_node = gt_interval_tree_node_new(rng, rng->start, rng->end); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); /* perform test queries */ for (i = 0; i < num_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(width); res = gt_interval_tree_find_first_overlapping(it, qrange.start, qrange.end); if (res) { /* we have a hit, check if really overlapping */ res_rng = (GtRange*) gt_interval_tree_node_get_data(res); gt_ensure(had_err, gt_range_overlap(&qrange, res_rng)); } else { /* no hit, check whether there really is no overlapping interval in tree */ GtRange *this_rng; unsigned long j; bool found = false; for (j = 0; j < gt_array_size(arr); j++) { this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { found = true; break; } } gt_ensure(had_err, !found); } } /* test searching for all overlapping intervals */ for (i = 0; i < num_find_all_samples && !had_err; i++) { unsigned long start = gt_rand_max(gt_range_max_basepos); qrange.start = start; qrange.end = start + gt_rand_max(query_width); GtArray *res = gt_array_new(sizeof (GtRange*)); gt_interval_tree_find_all_overlapping(it, qrange.start, qrange.end, res); if (res) { /* generate reference overlapping interval list by linear search */ GtArray *ref; unsigned long j; ref = gt_array_new(sizeof (GtRange*)); for (j = 0; j < gt_array_size(arr); j++) { GtRange *this_rng; this_rng = *(GtRange**) gt_array_get(arr, j); if (gt_range_overlap(this_rng, &qrange)) { gt_array_add(ref, this_rng); } } /* compare reference with interval tree query result */ gt_array_sort_stable(ref, range_ptr_compare); gt_array_sort_stable(res, range_ptr_compare); /* must be equal */ gt_ensure(had_err, gt_array_cmp(ref, res)==0); gt_array_delete(ref); } gt_array_delete(res); } gt_interval_tree_delete(it); it = gt_interval_tree_new(NULL); gt_array_reset(arr); /* generate test ranges */ for (i = 0;i<num_testranges && !had_err;i++) { unsigned long start; GtIntervalTreeNode *new_node; start = gt_rand_max(gt_range_max_basepos); new_node = gt_interval_tree_node_new((void*) i, start, start + gt_rand_max(width)); gt_interval_tree_insert(it, new_node); } gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges); narr = gt_array_new(sizeof (GtIntervalTreeNode*)); for (i = 0; i < num_testranges && !had_err; i++) { unsigned long idx, n, val; GtIntervalTreeNode *node = NULL; /* get all nodes referenced by the interval tree */ interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); /* remove a random node */ idx = gt_rand_max(gt_array_size(narr)-1); node = *(GtIntervalTreeNode**) gt_array_get(narr, idx); gt_ensure(had_err, node != NULL); val = (unsigned long) gt_interval_tree_node_get_data(node); gt_interval_tree_remove(it, node); gt_array_reset(narr); /* make sure that the node has disappeared */ gt_ensure(had_err, gt_interval_tree_size(it) == num_testranges - (i+1)); interval_tree_find_all_internal(it, it->root, itree_test_get_node, 0, gt_range_max_basepos+width, narr); gt_ensure(had_err, gt_array_size(narr) == num_testranges - (i+1)); for (n = 0; !had_err && n < gt_array_size(narr); n++) { GtIntervalTreeNode *onode = *(GtIntervalTreeNode**) gt_array_get(narr, n); gt_ensure(had_err, (unsigned long) gt_interval_tree_node_get_data(onode) != val); } } gt_array_delete(arr); gt_array_delete(narr); gt_interval_tree_delete(it); return had_err; }