Exemple #1
0
//{{{ void test_unordered_list_store_load_file_id_offset_pair(void)
void test_unordered_list_store_load_file_id_offset_pair(void)
{
    struct unordered_list *ul = unordered_list_init(10);


    uint32_t uints[20];
    long longs[20];

    uint32_t i;
    for (i = 0; i < 20; ++i) {
        uints[i] = rand();
        longs[i] = rand();

        struct file_id_offset_pair *p = 
                (struct file_id_offset_pair *)
                malloc(sizeof(struct file_id_offset_pair));
        p->file_id = uints[i];
        p->offset = longs[i];
        unordered_list_add(ul, p);
    }

    for (i = 0; i < 20; ++i) {
        struct file_id_offset_pair *p = unordered_list_get(ul, i);
        TEST_ASSERT_EQUAL(uints[i], p->file_id);
        TEST_ASSERT_EQUAL(longs[i], p->offset);
    }

    char *file_name = "test_unordered_list_store_load_file_id_offset_pair.dat";
    FILE *f = fopen(file_name, "wb");

    unordered_list_store(ul, f, file_name, file_id_offset_pair_store);

    fclose(f);

    f = fopen(file_name, "rb");

    struct unordered_list *ul_2 = unordered_list_load(f,
                                                    file_name,
                                                    file_id_offset_pair_load);

    for (i = 0; i < 20; ++i) {
        struct file_id_offset_pair *p = unordered_list_get(ul, i);
        struct file_id_offset_pair *p_2 = unordered_list_get(ul_2, i);
        TEST_ASSERT_EQUAL(p->file_id, p_2->file_id);
        TEST_ASSERT_EQUAL(p->offset, p_2->offset);
    }

    unordered_list_destroy(&ul, free_wrapper);
    unordered_list_destroy(&ul_2, free_wrapper);
    remove("test_unordered_list_store_load_file_id_offset_pair.dat");
}
Exemple #2
0
//{{{ void test_unordered_list_store_c_str(void)
void test_unordered_list_store_load_c_str(void)
{
    struct unordered_list *ul = unordered_list_init(5);

    char *A[10];
    asprintf(&(A[0]), "zero");
    asprintf(&(A[1]), "one");
    asprintf(&(A[2]), "two");
    asprintf(&(A[3]), "three");
    asprintf(&(A[4]), "four");
    asprintf(&(A[5]), "five");
    asprintf(&(A[6]), "six");
    asprintf(&(A[7]), "seven");
    asprintf(&(A[8]), "eight");
    asprintf(&(A[9]), "nine");

    uint32_t i;
    for (i = 0; i < 10; ++i)
        unordered_list_add(ul, A[i]);

    for (i = 0; i < 10; ++i) {
        char *s = unordered_list_get(ul, i);
        TEST_ASSERT_TRUE(strcmp(A[i], s) == 0)
    }

    char *file_name = "test_unordered_list_store_load_c_str.dat";
    FILE *f = fopen(file_name, "wb");

    unordered_list_store(ul, f, file_name, c_str_store);

    fclose(f);

    f = fopen(file_name, "rb");

    struct unordered_list *ul_2 = unordered_list_load(f,
                                                    file_name,
                                                    c_str_load);

    for (i = 0; i < 10; ++i) {
        char *s = unordered_list_get(ul, i);
        char *s_2 = unordered_list_get(ul_2, i);
        TEST_ASSERT_TRUE(strcmp(s, s_2) == 0)
    }

    unordered_list_destroy(&ul, free_wrapper);
    unordered_list_destroy(&ul_2, free_wrapper);
    remove(file_name);
}
Exemple #3
0
//{{{void test_unordered_list_get(void)
void test_unordered_list_get(void)
{
    struct unordered_list *ul = unordered_list_init(10);

    int i, V[20];

    for (i = 0; i < 20; ++i)
        V[i] = (i+1)*2; 

    for (i = 0; i < 20; ++i) 
        TEST_ASSERT_EQUAL(i, unordered_list_add(ul, (void *)(V + i)));

    for (i = 0; i < 20; ++i)  {
        int *r = (int *)unordered_list_get(ul, i);
        TEST_ASSERT_EQUAL(V[i], (int)(*r));
    }

    void *r = unordered_list_get(ul, 5000);
    TEST_ASSERT_EQUAL(NULL, r);
    
    unordered_list_destroy(&ul, NULL);
}
Exemple #4
0
void test_get_file_stats(void)
{

    struct input_file *i = input_file_init("../data/1k.unsort.bed.gz");
    struct unordered_list *file_index = unordered_list_init(1);


    struct file_data *fd = (struct file_data *)
        calloc(1, sizeof(struct file_data));

    uint32_t file_id = unordered_list_add(file_index, fd);

    fd->file_name = strdup("../data/1k.unsort.bed.gz");
    fd->num_intervals = 0;
    fd->mean_interval_size = 0;

    int chrm_len = 10;
    char *chrm = (char *)malloc(chrm_len*sizeof(char));
    uint32_t start, end;
    long offset;

    uint32_t j = 0;

    struct file_id_offset_pair *p;
    uint32_t intrv_id;

    while (input_file_get_next_interval(i,
                &chrm,
                &chrm_len,
                &start,
                &end,
                &offset) >= 0) {
        fd->mean_interval_size += end-start;
        fd->num_intervals += 1;
    }

    fd->mean_interval_size = fd->mean_interval_size/fd->num_intervals;
    input_file_destroy(&i);
    free(chrm);

    char *out_file_name = "test_file_data_read_write.tmp";

    FILE *f = fopen(out_file_name, "wb");
    unordered_list_store(file_index, f, out_file_name, file_data_store);
    fclose(f);

    f = fopen(out_file_name, "rb");
    struct unordered_list *file_index_r = 
        unordered_list_load(f,
                            out_file_name,
                            file_data_load);

    struct file_data *fd_r = (struct file_data *)
            unordered_list_get(file_index_r, file_id);

    TEST_ASSERT_EQUAL(0, strcmp(fd->file_name, fd_r->file_name));
    TEST_ASSERT_EQUAL(fd->num_intervals, fd_r->num_intervals);
    TEST_ASSERT_EQUAL(fd->mean_interval_size, fd_r->mean_interval_size);

    unordered_list_destroy(&file_index, file_data_free);
    unordered_list_destroy(&file_index_r, file_data_free);

    remove(out_file_name);
}
Exemple #5
0
int main(int argc, char **argv)
{
    WAH_SIZE = 32;
    WAH_MAX_FILL_WORDS = (1<<(WAH_SIZE-1)) - 1;

    uint32_t num_chrms = 100;

    if ((argc != 4)) {
        errx(1,
             "usage:\t%s <input file> <index dir> <w|i>",
             argv[0]);
    }

    double genome_size =  3095677412.0;

    char *input_file = argv[1];
    char *index_dir = argv[2];
    char *i_type = argv[3];

    struct input_file *in_f = input_file_init(input_file);

    int chrm_len = 50;
    char *chrm = (char *)malloc(chrm_len*sizeof(char));
    uint32_t start, end;
    long offset;

    struct giggle_index *gi;

    gi = giggle_load(index_dir,
                     uint32_t_ll_giggle_set_data_handler);

    uint32_t *file_counts = (uint32_t *)
            calloc(gi->file_index->num, sizeof(uint32_t));

    uint32_t num_intervals = 0;
    double mean_interval_size = 0.0;
    while ( in_f->input_file_get_next_interval(in_f, 
                                               &chrm,
                                               &chrm_len,
                                               &start,
                                               &end,
                                               &offset) >= 0 ) {
        num_intervals += 1;
        mean_interval_size += end - start;

        struct uint32_t_ll *R =
                (struct uint32_t_ll *)giggle_query_region(gi,
                                                          chrm,
                                                          start,
                                                          end);
        if (R != NULL) {
            struct uint32_t_ll_node *curr = R->head;

            while (curr != NULL) {
                /*
                struct file_id_offset_pair *fid_off = 
                    (struct file_id_offset_pair *)
                    unordered_list_get(gi->offset_index, curr->val);
                */
                struct file_id_offset_pair fid_off = 
                    gi->offset_index->vals[curr->val];
                struct file_data *fd = 
                    (struct file_data *)
                    unordered_list_get(gi->file_index, fid_off.file_id);

                file_counts[fid_off.file_id] += 1;

                curr = curr->next;
            }
            uint32_t_ll_free((void **)&R);
        }
    }

    mean_interval_size = mean_interval_size/num_intervals;

    struct doubles_uint32_t_tuple *sig = (struct doubles_uint32_t_tuple *)
        calloc(gi->file_index->num, sizeof(struct doubles_uint32_t_tuple));

    uint32_t i;
    for (i = 0; i < gi->file_index->num; ++i) {
        struct file_data *fd = 
            (struct file_data *)
            unordered_list_get(gi->file_index, i);

        long long n11 = (long long)(file_counts[i]);
        long long n12 = (long long)(MAX(0,num_intervals - file_counts[i]));
        long long n21 = (long long)(MAX(0,fd->num_intervals - file_counts[i]));
        double comp_mean = ((fd->mean_interval_size+mean_interval_size));
        long long n22_full = (long long)
            MAX(n11 + n12 + n21, genome_size/comp_mean);
        long long n22 = MAX(0, n22_full - (n11 + n12 + n21));
        double left, right, two;
        double r = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two);

        double ratio = (((double)n11/(double)n12) / ((double)n21/(double)n22));

        //fprintf(stderr, "%s\t%f\n", fd->file_name, two);
        sig[i].d1 = right;
        sig[i].d2 = ratio;
        sig[i].u1 = i;
        sig[i].u2 = file_counts[i];
    }

    qsort(sig,
          gi->file_index->num,
          sizeof(struct doubles_uint32_t_tuple), 
          doubles_uint32_t_tuple_cmp);

    for (i = 0; i < gi->file_index->num; ++i) {
        struct file_data *fd = 
            (struct file_data *)
            unordered_list_get(gi->file_index, sig[i].u1);
        /*
        printf("%s\t"
               "right:%f\t"
               "%f\n", fd->file_name, sig[i].d1, sig[i].d2);
        */
        printf( "sig:%f\t"
                "size:%u\t"
                "overlap:%u\t"
                "ratio:%f\t"
                "%s\n",
                sig[i].d1,
                fd->num_intervals,
                sig[i].u2,
                sig[i].d2,
                fd->file_name);
    }

    giggle_index_destroy(&gi);
    cache.destroy();
}