int main(int argc, char **argv) { if ((argc != 5)) { errx(1, "usage:\t%s <index dir> <chrom> <start> <end>\n", argv[0]); } char *index_dir = argv[1]; char *chrom = argv[2]; uint32_t start = atoi(argv[3]); uint32_t end = atoi(argv[4]); struct giggle_index *gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); struct giggle_query_result *gqr = giggle_query(gi, chrom, start, end, NULL); uint32_t i; for(i = 0; i < gqr->num_files; i++) { char *result; struct giggle_query_iter *gqi = giggle_get_query_itr(gqr, i); while (giggle_query_next(gqi, &result) == 0) { printf("%s\n", result); } giggle_iter_destroy(&gqi); } giggle_index_destroy(&gi); cache.destroy(); }
int main(int argc, char **argv) { WAH_SIZE = 32; WAH_MAX_FILL_WORDS = (1<<(WAH_SIZE-1)) - 1; uint32_t num_chrms = 100; if ((argc != 5)) { errx(1, "usage:\t%s <index dir> <region> <w|i> <n>", argv[0]); } char *index_dir = argv[1]; char *region_s = argv[2]; char *i_type = argv[3]; uint32_t N = atoi(argv[4]); char *chrm = region_s; uint32_t start = 0, end = 0; uint32_t i, len = strlen(region_s); for (i = 0; i < len; ++i) { if (region_s[i] == ':') { region_s[i] = '\0'; start = atoi(region_s + i + 1); } else if (region_s[i] == '-') { region_s[i] = '\0'; end = atoi(region_s + i + 1); break; } } struct giggle_index *gi; if (i_type[0] == 'i') { gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); struct uint32_t_ll *R = (struct uint32_t_ll *)giggle_query_region(gi, chrm, start, end); #if 0 for (i = 0; i < N; ++i) { start += 10; end += 10; char *rand_chr = NULL; //asprintf(&rand_chr, "chr%u", 1 + rand() %10); //fprintf(stderr, "%s:%u-%u\n", rand_chr, start, end); struct uint32_t_ll *R = (struct uint32_t_ll *)giggle_query_region(gi, chrm, start, end); /* if (R != NULL) printf("Hits:%u\n", R->len); else printf("Hits:0\n"); */ free(R); } #endif } else { gi = giggle_load(index_dir, wah_giggle_set_data_handler); for (i = 0; i < N; ++i) { start += 10; end += 10; char *rand_chr = NULL; //asprintf(&rand_chr, "chr%u", 1 + rand() %10); //fprintf(stderr, "%s:%u-%u\n", rand_chr, start, end); uint8_t *R = (uint8_t *)giggle_query_region(gi, chrm, start, end); /* if (R != NULL) printf("Hits:%u\n", wah_get_ints_count(R)); else printf("Hits:0\n"); */ free(R); } } giggle_index_destroy(&gi); cache.destroy(); }
int main(int argc, char **argv) { uint32_t num_chrms = 100; if ((argc != 4)) { errx(1, "usage:\t%s <index dir> <region> <w|i>", argv[0]); } char *index_dir = argv[1]; char *region_s = argv[2]; char *i_type = argv[3]; struct giggle_index *gi; gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); #if 0 char *chrm = region_s; uint32_t start = 0, end = 0; uint32_t i, len = strlen(region_s); for (i = 0; i < len; ++i) { if (region_s[i] == ':') { region_s[i] = '\0'; start = atoi(region_s + i + 1); } else if (region_s[i] == '-') { region_s[i] = '\0'; end = atoi(region_s + i + 1); break; } } struct giggle_index *gi; if (i_type[0] == 'i') { gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); struct uint32_t_ll *R = (struct uint32_t_ll *)giggle_query_region(gi, chrm, start, end); if (R != NULL) printf("Hits:%u\n", R->len); else printf("Hits:0\n"); } else { gi = giggle_load(index_dir, wah_giggle_set_data_handler); uint32_t chr_id = giggle_get_chrm_id(gi, chrm); //return giggle_search(chr_id, gi->root_ids[chr_id], start, end); uint32_t domain = chr_id; uint32_t root_id = gi->root_ids[chr_id]; uint32_t leaf_start_id; int pos_start_id; uint32_t nld_start_id = bpt_find(domain, root_id, &leaf_start_id, &pos_start_id, start); fprintf(stderr, "nld_start_id:%u\t" "leaf_start_id:%u\t" "pos_start_id:%u\n", nld_start_id, leaf_start_id, pos_start_id); struct bpt_node *leaf_start = cache.get(domain, leaf_start_id - 1, &bpt_node_cache_handler); bpt_print_node(leaf_start); struct wah_bpt_non_leading_data *nld = cache.get(domain, BPT_POINTERS(leaf_start)[0] - 1, &wah_non_leading_cache_handler); fprintf(stderr, "WAH_LEN:%u\t" "wah_get_ints_count:%u\t" "\n", WAH_LEN(nld->SA), wah_get_ints_count(nld->SA)); uint32_t *R = NULL; uint32_t R_len = wah_get_ints(nld->SA, &R); uint32_t i; for (i = 0; i < R_len; ++i) { fprintf(stderr, "%u:%u\n", i, R[i]); } /* uint8_t *R = (uint8_t *)giggle_query_region(gi, chrm, start, end); if (R != NULL) printf("Hits:%u\n", wah_get_ints_count(R)); else printf("Hits:0\n"); */ } #endif giggle_index_destroy(&gi); cache.destroy(); }
int main(int argc, char **argv) { WAH_SIZE = 32; WAH_MAX_FILL_WORDS = (1<<(WAH_SIZE-1)) - 1; uint32_t num_chrms = 100; if ((argc != 4)) { errx(1, "usage:\t%s <input file> <index dir> <w|i>", argv[0]); } double genome_size = 3095677412.0; char *input_file = argv[1]; char *index_dir = argv[2]; char *i_type = argv[3]; struct input_file *in_f = input_file_init(input_file); int chrm_len = 50; char *chrm = (char *)malloc(chrm_len*sizeof(char)); uint32_t start, end; long offset; struct giggle_index *gi; gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); uint32_t *file_counts = (uint32_t *) calloc(gi->file_index->num, sizeof(uint32_t)); uint32_t num_intervals = 0; double mean_interval_size = 0.0; while ( in_f->input_file_get_next_interval(in_f, &chrm, &chrm_len, &start, &end, &offset) >= 0 ) { num_intervals += 1; mean_interval_size += end - start; struct uint32_t_ll *R = (struct uint32_t_ll *)giggle_query_region(gi, chrm, start, end); if (R != NULL) { struct uint32_t_ll_node *curr = R->head; while (curr != NULL) { /* struct file_id_offset_pair *fid_off = (struct file_id_offset_pair *) unordered_list_get(gi->offset_index, curr->val); */ struct file_id_offset_pair fid_off = gi->offset_index->vals[curr->val]; struct file_data *fd = (struct file_data *) unordered_list_get(gi->file_index, fid_off.file_id); file_counts[fid_off.file_id] += 1; curr = curr->next; } uint32_t_ll_free((void **)&R); } } mean_interval_size = mean_interval_size/num_intervals; struct doubles_uint32_t_tuple *sig = (struct doubles_uint32_t_tuple *) calloc(gi->file_index->num, sizeof(struct doubles_uint32_t_tuple)); uint32_t i; for (i = 0; i < gi->file_index->num; ++i) { struct file_data *fd = (struct file_data *) unordered_list_get(gi->file_index, i); long long n11 = (long long)(file_counts[i]); long long n12 = (long long)(MAX(0,num_intervals - file_counts[i])); long long n21 = (long long)(MAX(0,fd->num_intervals - file_counts[i])); double comp_mean = ((fd->mean_interval_size+mean_interval_size)); long long n22_full = (long long) MAX(n11 + n12 + n21, genome_size/comp_mean); long long n22 = MAX(0, n22_full - (n11 + n12 + n21)); double left, right, two; double r = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two); double ratio = (((double)n11/(double)n12) / ((double)n21/(double)n22)); //fprintf(stderr, "%s\t%f\n", fd->file_name, two); sig[i].d1 = right; sig[i].d2 = ratio; sig[i].u1 = i; sig[i].u2 = file_counts[i]; } qsort(sig, gi->file_index->num, sizeof(struct doubles_uint32_t_tuple), doubles_uint32_t_tuple_cmp); for (i = 0; i < gi->file_index->num; ++i) { struct file_data *fd = (struct file_data *) unordered_list_get(gi->file_index, sig[i].u1); /* printf("%s\t" "right:%f\t" "%f\n", fd->file_name, sig[i].d1, sig[i].d2); */ printf( "sig:%f\t" "size:%u\t" "overlap:%u\t" "ratio:%f\t" "%s\n", sig[i].d1, fd->num_intervals, sig[i].u2, sig[i].d2, fd->file_name); } giggle_index_destroy(&gi); cache.destroy(); }