//{{{ void test_unordered_list_store_load_file_id_offset_pair(void) void test_unordered_list_store_load_file_id_offset_pair(void) { struct unordered_list *ul = unordered_list_init(10); uint32_t uints[20]; long longs[20]; uint32_t i; for (i = 0; i < 20; ++i) { uints[i] = rand(); longs[i] = rand(); struct file_id_offset_pair *p = (struct file_id_offset_pair *) malloc(sizeof(struct file_id_offset_pair)); p->file_id = uints[i]; p->offset = longs[i]; unordered_list_add(ul, p); } for (i = 0; i < 20; ++i) { struct file_id_offset_pair *p = unordered_list_get(ul, i); TEST_ASSERT_EQUAL(uints[i], p->file_id); TEST_ASSERT_EQUAL(longs[i], p->offset); } char *file_name = "test_unordered_list_store_load_file_id_offset_pair.dat"; FILE *f = fopen(file_name, "wb"); unordered_list_store(ul, f, file_name, file_id_offset_pair_store); fclose(f); f = fopen(file_name, "rb"); struct unordered_list *ul_2 = unordered_list_load(f, file_name, file_id_offset_pair_load); for (i = 0; i < 20; ++i) { struct file_id_offset_pair *p = unordered_list_get(ul, i); struct file_id_offset_pair *p_2 = unordered_list_get(ul_2, i); TEST_ASSERT_EQUAL(p->file_id, p_2->file_id); TEST_ASSERT_EQUAL(p->offset, p_2->offset); } unordered_list_destroy(&ul, free_wrapper); unordered_list_destroy(&ul_2, free_wrapper); remove("test_unordered_list_store_load_file_id_offset_pair.dat"); }
//{{{ void test_unordered_list_store_c_str(void) void test_unordered_list_store_load_c_str(void) { struct unordered_list *ul = unordered_list_init(5); char *A[10]; asprintf(&(A[0]), "zero"); asprintf(&(A[1]), "one"); asprintf(&(A[2]), "two"); asprintf(&(A[3]), "three"); asprintf(&(A[4]), "four"); asprintf(&(A[5]), "five"); asprintf(&(A[6]), "six"); asprintf(&(A[7]), "seven"); asprintf(&(A[8]), "eight"); asprintf(&(A[9]), "nine"); uint32_t i; for (i = 0; i < 10; ++i) unordered_list_add(ul, A[i]); for (i = 0; i < 10; ++i) { char *s = unordered_list_get(ul, i); TEST_ASSERT_TRUE(strcmp(A[i], s) == 0) } char *file_name = "test_unordered_list_store_load_c_str.dat"; FILE *f = fopen(file_name, "wb"); unordered_list_store(ul, f, file_name, c_str_store); fclose(f); f = fopen(file_name, "rb"); struct unordered_list *ul_2 = unordered_list_load(f, file_name, c_str_load); for (i = 0; i < 10; ++i) { char *s = unordered_list_get(ul, i); char *s_2 = unordered_list_get(ul_2, i); TEST_ASSERT_TRUE(strcmp(s, s_2) == 0) } unordered_list_destroy(&ul, free_wrapper); unordered_list_destroy(&ul_2, free_wrapper); remove(file_name); }
//{{{void test_unordered_list_get(void) void test_unordered_list_get(void) { struct unordered_list *ul = unordered_list_init(10); int i, V[20]; for (i = 0; i < 20; ++i) V[i] = (i+1)*2; for (i = 0; i < 20; ++i) TEST_ASSERT_EQUAL(i, unordered_list_add(ul, (void *)(V + i))); for (i = 0; i < 20; ++i) { int *r = (int *)unordered_list_get(ul, i); TEST_ASSERT_EQUAL(V[i], (int)(*r)); } void *r = unordered_list_get(ul, 5000); TEST_ASSERT_EQUAL(NULL, r); unordered_list_destroy(&ul, NULL); }
void test_get_file_stats(void) { struct input_file *i = input_file_init("../data/1k.unsort.bed.gz"); struct unordered_list *file_index = unordered_list_init(1); struct file_data *fd = (struct file_data *) calloc(1, sizeof(struct file_data)); uint32_t file_id = unordered_list_add(file_index, fd); fd->file_name = strdup("../data/1k.unsort.bed.gz"); fd->num_intervals = 0; fd->mean_interval_size = 0; int chrm_len = 10; char *chrm = (char *)malloc(chrm_len*sizeof(char)); uint32_t start, end; long offset; uint32_t j = 0; struct file_id_offset_pair *p; uint32_t intrv_id; while (input_file_get_next_interval(i, &chrm, &chrm_len, &start, &end, &offset) >= 0) { fd->mean_interval_size += end-start; fd->num_intervals += 1; } fd->mean_interval_size = fd->mean_interval_size/fd->num_intervals; input_file_destroy(&i); free(chrm); char *out_file_name = "test_file_data_read_write.tmp"; FILE *f = fopen(out_file_name, "wb"); unordered_list_store(file_index, f, out_file_name, file_data_store); fclose(f); f = fopen(out_file_name, "rb"); struct unordered_list *file_index_r = unordered_list_load(f, out_file_name, file_data_load); struct file_data *fd_r = (struct file_data *) unordered_list_get(file_index_r, file_id); TEST_ASSERT_EQUAL(0, strcmp(fd->file_name, fd_r->file_name)); TEST_ASSERT_EQUAL(fd->num_intervals, fd_r->num_intervals); TEST_ASSERT_EQUAL(fd->mean_interval_size, fd_r->mean_interval_size); unordered_list_destroy(&file_index, file_data_free); unordered_list_destroy(&file_index_r, file_data_free); remove(out_file_name); }
int main(int argc, char **argv) { WAH_SIZE = 32; WAH_MAX_FILL_WORDS = (1<<(WAH_SIZE-1)) - 1; uint32_t num_chrms = 100; if ((argc != 4)) { errx(1, "usage:\t%s <input file> <index dir> <w|i>", argv[0]); } double genome_size = 3095677412.0; char *input_file = argv[1]; char *index_dir = argv[2]; char *i_type = argv[3]; struct input_file *in_f = input_file_init(input_file); int chrm_len = 50; char *chrm = (char *)malloc(chrm_len*sizeof(char)); uint32_t start, end; long offset; struct giggle_index *gi; gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); uint32_t *file_counts = (uint32_t *) calloc(gi->file_index->num, sizeof(uint32_t)); uint32_t num_intervals = 0; double mean_interval_size = 0.0; while ( in_f->input_file_get_next_interval(in_f, &chrm, &chrm_len, &start, &end, &offset) >= 0 ) { num_intervals += 1; mean_interval_size += end - start; struct uint32_t_ll *R = (struct uint32_t_ll *)giggle_query_region(gi, chrm, start, end); if (R != NULL) { struct uint32_t_ll_node *curr = R->head; while (curr != NULL) { /* struct file_id_offset_pair *fid_off = (struct file_id_offset_pair *) unordered_list_get(gi->offset_index, curr->val); */ struct file_id_offset_pair fid_off = gi->offset_index->vals[curr->val]; struct file_data *fd = (struct file_data *) unordered_list_get(gi->file_index, fid_off.file_id); file_counts[fid_off.file_id] += 1; curr = curr->next; } uint32_t_ll_free((void **)&R); } } mean_interval_size = mean_interval_size/num_intervals; struct doubles_uint32_t_tuple *sig = (struct doubles_uint32_t_tuple *) calloc(gi->file_index->num, sizeof(struct doubles_uint32_t_tuple)); uint32_t i; for (i = 0; i < gi->file_index->num; ++i) { struct file_data *fd = (struct file_data *) unordered_list_get(gi->file_index, i); long long n11 = (long long)(file_counts[i]); long long n12 = (long long)(MAX(0,num_intervals - file_counts[i])); long long n21 = (long long)(MAX(0,fd->num_intervals - file_counts[i])); double comp_mean = ((fd->mean_interval_size+mean_interval_size)); long long n22_full = (long long) MAX(n11 + n12 + n21, genome_size/comp_mean); long long n22 = MAX(0, n22_full - (n11 + n12 + n21)); double left, right, two; double r = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two); double ratio = (((double)n11/(double)n12) / ((double)n21/(double)n22)); //fprintf(stderr, "%s\t%f\n", fd->file_name, two); sig[i].d1 = right; sig[i].d2 = ratio; sig[i].u1 = i; sig[i].u2 = file_counts[i]; } qsort(sig, gi->file_index->num, sizeof(struct doubles_uint32_t_tuple), doubles_uint32_t_tuple_cmp); for (i = 0; i < gi->file_index->num; ++i) { struct file_data *fd = (struct file_data *) unordered_list_get(gi->file_index, sig[i].u1); /* printf("%s\t" "right:%f\t" "%f\n", fd->file_name, sig[i].d1, sig[i].d2); */ printf( "sig:%f\t" "size:%u\t" "overlap:%u\t" "ratio:%f\t" "%s\n", sig[i].d1, fd->num_intervals, sig[i].u2, sig[i].d2, fd->file_name); } giggle_index_destroy(&gi); cache.destroy(); }