Example #1
0
int main(int argc, char **argv)
{

    if ((argc != 5)) {
        errx(1,
             "usage:\t%s <index dir> <chrom> <start> <end>\n",
             argv[0]);
    }

    char *index_dir = argv[1];
    char *chrom = argv[2];
    uint32_t start = atoi(argv[3]);
    uint32_t end = atoi(argv[4]);

    struct giggle_index *gi = giggle_load(index_dir,
                                          uint32_t_ll_giggle_set_data_handler);

    struct giggle_query_result *gqr = giggle_query(gi, chrom, start, end, NULL);



    uint32_t i;
    for(i = 0; i < gqr->num_files; i++) {
        char *result;
        struct giggle_query_iter *gqi = giggle_get_query_itr(gqr, i);
        while (giggle_query_next(gqi, &result) == 0) {
            printf("%s\n", result);
        }
        giggle_iter_destroy(&gqi);
    }

    giggle_index_destroy(&gi);
    cache.destroy();
}
Example #2
0
int main(int argc, char **argv)
{

    WAH_SIZE = 32;
    WAH_MAX_FILL_WORDS = (1<<(WAH_SIZE-1)) - 1;

    uint32_t num_chrms = 100;

    if ((argc != 5)) {
        errx(1,
             "usage:\t%s <index dir> <region> <w|i> <n>",
             argv[0]);
    }

    char *index_dir = argv[1];
    char *region_s = argv[2];
    char *i_type = argv[3];
    uint32_t N = atoi(argv[4]);

    char *chrm = region_s;
    uint32_t start = 0, end = 0;
    uint32_t i, len = strlen(region_s);
    
    for (i = 0; i < len; ++i) {
        if (region_s[i] == ':') {
            region_s[i] = '\0';
            start = atoi(region_s + i + 1);
        } else if (region_s[i] == '-') {
            region_s[i] = '\0';
            end = atoi(region_s + i + 1);
            break;
        }
    }

    struct giggle_index *gi;
    if (i_type[0] == 'i') {
        gi = giggle_load(index_dir,
                         uint32_t_ll_giggle_set_data_handler);

        struct uint32_t_ll *R =
                (struct uint32_t_ll *)giggle_query_region(gi,
                                                          chrm,
                                                          start,
                                                          end);



#if 0

        for (i = 0; i < N; ++i) {
            start += 10;
            end += 10;
            char *rand_chr = NULL;
            //asprintf(&rand_chr, "chr%u", 1 + rand() %10);
            //fprintf(stderr, "%s:%u-%u\n", rand_chr, start, end);
            struct uint32_t_ll *R =
                    (struct uint32_t_ll *)giggle_query_region(gi,
                                                              chrm,
                                                              start,
                                                              end);

            /*
            if (R != NULL)
                printf("Hits:%u\n", R->len);
            else
                printf("Hits:0\n");
            */

            free(R);
        }
#endif

    } else {
        gi = giggle_load(index_dir,
                         wah_giggle_set_data_handler);

        for (i = 0; i < N; ++i) {
            start += 10;
            end += 10;
            char *rand_chr = NULL;
            //asprintf(&rand_chr, "chr%u", 1 + rand() %10);
            //fprintf(stderr, "%s:%u-%u\n", rand_chr, start, end);
 
            uint8_t *R = (uint8_t *)giggle_query_region(gi,
                                                        chrm,
                                                        start,
                                                        end);
            /*
        if (R != NULL)
            printf("Hits:%u\n", wah_get_ints_count(R));
        else
            printf("Hits:0\n");
        */
            free(R);
        }

    }

    giggle_index_destroy(&gi);
    cache.destroy();
}
Example #3
0
int main(int argc, char **argv)
{
    uint32_t num_chrms = 100;

    if ((argc != 4)) {
        errx(1,
             "usage:\t%s <index dir> <region> <w|i>",
             argv[0]);
    }

    char *index_dir = argv[1];
    char *region_s = argv[2];
    char *i_type = argv[3];

    struct giggle_index *gi;
    gi = giggle_load(index_dir,
                     uint32_t_ll_giggle_set_data_handler);


#if 0
    char *chrm = region_s;
    uint32_t start = 0, end = 0;
    uint32_t i, len = strlen(region_s);
    
    for (i = 0; i < len; ++i) {
        if (region_s[i] == ':') {
            region_s[i] = '\0';
            start = atoi(region_s + i + 1);
        } else if (region_s[i] == '-') {
            region_s[i] = '\0';
            end = atoi(region_s + i + 1);
            break;
        }
    }

    struct giggle_index *gi;
    if (i_type[0] == 'i') {
        gi = giggle_load(index_dir,
                         uint32_t_ll_giggle_set_data_handler);

        struct uint32_t_ll *R =
                (struct uint32_t_ll *)giggle_query_region(gi,
                                                          chrm,
                                                          start,
                                                          end);

        if (R != NULL)
            printf("Hits:%u\n", R->len);
        else
            printf("Hits:0\n");

    } else {
        gi = giggle_load(index_dir,
                         wah_giggle_set_data_handler);

        uint32_t chr_id = giggle_get_chrm_id(gi, chrm);
        //return giggle_search(chr_id, gi->root_ids[chr_id], start, end);
        
        uint32_t domain = chr_id;
        uint32_t root_id = gi->root_ids[chr_id];

        uint32_t leaf_start_id;
        int pos_start_id;

        uint32_t nld_start_id = bpt_find(domain,
                                         root_id,
                                         &leaf_start_id,
                                         &pos_start_id,
                                         start);
        fprintf(stderr,
                "nld_start_id:%u\t"
                "leaf_start_id:%u\t"
                "pos_start_id:%u\n",
                nld_start_id,
                leaf_start_id,
                pos_start_id);

        struct bpt_node *leaf_start = cache.get(domain,
                                                leaf_start_id - 1,
                                                &bpt_node_cache_handler);
        bpt_print_node(leaf_start);

        
        struct wah_bpt_non_leading_data *nld = 
                cache.get(domain,
                          BPT_POINTERS(leaf_start)[0] - 1,
                          &wah_non_leading_cache_handler);

        fprintf(stderr,
                "WAH_LEN:%u\t"
                "wah_get_ints_count:%u\t"
                "\n",
                WAH_LEN(nld->SA),
                wah_get_ints_count(nld->SA));
            
        uint32_t *R = NULL;
        uint32_t R_len = wah_get_ints(nld->SA, &R);

        uint32_t i;
        for (i = 0; i < R_len; ++i) {
            fprintf(stderr, "%u:%u\n", i, R[i]);
        }

        /*
        uint8_t *R = (uint8_t *)giggle_query_region(gi,
                                                    chrm,
                                                    start,
                                                    end);
        if (R != NULL)
            printf("Hits:%u\n", wah_get_ints_count(R));
        else
            printf("Hits:0\n");
        */

    }
#endif
    giggle_index_destroy(&gi);
    cache.destroy();
}
Example #4
0
int main(int argc, char **argv)
{
    WAH_SIZE = 32;
    WAH_MAX_FILL_WORDS = (1<<(WAH_SIZE-1)) - 1;

    uint32_t num_chrms = 100;

    if ((argc != 4)) {
        errx(1,
             "usage:\t%s <input file> <index dir> <w|i>",
             argv[0]);
    }

    double genome_size =  3095677412.0;

    char *input_file = argv[1];
    char *index_dir = argv[2];
    char *i_type = argv[3];

    struct input_file *in_f = input_file_init(input_file);

    int chrm_len = 50;
    char *chrm = (char *)malloc(chrm_len*sizeof(char));
    uint32_t start, end;
    long offset;

    struct giggle_index *gi;

    gi = giggle_load(index_dir,
                     uint32_t_ll_giggle_set_data_handler);

    uint32_t *file_counts = (uint32_t *)
            calloc(gi->file_index->num, sizeof(uint32_t));

    uint32_t num_intervals = 0;
    double mean_interval_size = 0.0;
    while ( in_f->input_file_get_next_interval(in_f, 
                                               &chrm,
                                               &chrm_len,
                                               &start,
                                               &end,
                                               &offset) >= 0 ) {
        num_intervals += 1;
        mean_interval_size += end - start;

        struct uint32_t_ll *R =
                (struct uint32_t_ll *)giggle_query_region(gi,
                                                          chrm,
                                                          start,
                                                          end);
        if (R != NULL) {
            struct uint32_t_ll_node *curr = R->head;

            while (curr != NULL) {
                /*
                struct file_id_offset_pair *fid_off = 
                    (struct file_id_offset_pair *)
                    unordered_list_get(gi->offset_index, curr->val);
                */
                struct file_id_offset_pair fid_off = 
                    gi->offset_index->vals[curr->val];
                struct file_data *fd = 
                    (struct file_data *)
                    unordered_list_get(gi->file_index, fid_off.file_id);

                file_counts[fid_off.file_id] += 1;

                curr = curr->next;
            }
            uint32_t_ll_free((void **)&R);
        }
    }

    mean_interval_size = mean_interval_size/num_intervals;

    struct doubles_uint32_t_tuple *sig = (struct doubles_uint32_t_tuple *)
        calloc(gi->file_index->num, sizeof(struct doubles_uint32_t_tuple));

    uint32_t i;
    for (i = 0; i < gi->file_index->num; ++i) {
        struct file_data *fd = 
            (struct file_data *)
            unordered_list_get(gi->file_index, i);

        long long n11 = (long long)(file_counts[i]);
        long long n12 = (long long)(MAX(0,num_intervals - file_counts[i]));
        long long n21 = (long long)(MAX(0,fd->num_intervals - file_counts[i]));
        double comp_mean = ((fd->mean_interval_size+mean_interval_size));
        long long n22_full = (long long)
            MAX(n11 + n12 + n21, genome_size/comp_mean);
        long long n22 = MAX(0, n22_full - (n11 + n12 + n21));
        double left, right, two;
        double r = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two);

        double ratio = (((double)n11/(double)n12) / ((double)n21/(double)n22));

        //fprintf(stderr, "%s\t%f\n", fd->file_name, two);
        sig[i].d1 = right;
        sig[i].d2 = ratio;
        sig[i].u1 = i;
        sig[i].u2 = file_counts[i];
    }

    qsort(sig,
          gi->file_index->num,
          sizeof(struct doubles_uint32_t_tuple), 
          doubles_uint32_t_tuple_cmp);

    for (i = 0; i < gi->file_index->num; ++i) {
        struct file_data *fd = 
            (struct file_data *)
            unordered_list_get(gi->file_index, sig[i].u1);
        /*
        printf("%s\t"
               "right:%f\t"
               "%f\n", fd->file_name, sig[i].d1, sig[i].d2);
        */
        printf( "sig:%f\t"
                "size:%u\t"
                "overlap:%u\t"
                "ratio:%f\t"
                "%s\n",
                sig[i].d1,
                fd->num_intervals,
                sig[i].u2,
                sig[i].d2,
                fd->file_name);
    }

    giggle_index_destroy(&gi);
    cache.destroy();
}