示例#1
0
//{{{void test_init_file(void)
void test_init_file(void)
{
    struct input_file *i = input_file_init("../data/1k.sort.bed.gz");
    input_file_destroy(&i);

    TEST_ASSERT_EQUAL(NULL, i);
}
示例#2
0
//{{{void test_bed_file_read(void)
void test_bed_file_read(void)
{

    struct input_file *i = input_file_init("../data/1k.unsort.bed.gz");

    int chrm_len = 10;
    char *chrm = (char *)malloc(chrm_len*sizeof(char));
    uint32_t start, end;
    long offset;


    char *chrm_A[10] = {"chr11",
        "chr11",  
        "chr10",  
        "chr16",  
        "chr15",  
        "chr19",  
        "chr19",  
        "chr18",  
        "chr21",  
        "chr7"};

    uint32_t start_A[10] = {64691252,
        129871988,
        74031859, 
        3070038,
        93346819,  
        4374094,
        42805980,
        3602738,
        9825304,
        2393484};


    uint32_t end_A[10] = {64692359,
        129873775,
        74037598,
        3072761,
        93347932,
        4376369,
        42807400,
        3605403,
        9827741,
        2394629};

    int j;
    for (j = 0; j < 10; ++j) {
        int ret = input_file_get_next_interval(i,
                &chrm,
                &chrm_len,
                &start,
                &end,
                &offset);
        TEST_ASSERT_EQUAL(0,strcmp(chrm_A[j], chrm));
        TEST_ASSERT_EQUAL(start_A[j], start);
        TEST_ASSERT_EQUAL(end_A[j], end);
    }

    while (input_file_get_next_interval(i,
                &chrm,
                &chrm_len,
                &start,
                &end,
                &offset) >= 0) {
        ++j;
    }

    TEST_ASSERT_EQUAL(0,strcmp("chr1", chrm));
    TEST_ASSERT_EQUAL(25895359, start);
    TEST_ASSERT_EQUAL(25896171, end);

    TEST_ASSERT_EQUAL(1000, j);

    input_file_destroy(&i);
}
示例#3
0
void test_get_file_stats(void)
{

    struct input_file *i = input_file_init("../data/1k.unsort.bed.gz");
    struct unordered_list *file_index = unordered_list_init(1);


    struct file_data *fd = (struct file_data *)
        calloc(1, sizeof(struct file_data));

    uint32_t file_id = unordered_list_add(file_index, fd);

    fd->file_name = strdup("../data/1k.unsort.bed.gz");
    fd->num_intervals = 0;
    fd->mean_interval_size = 0;

    int chrm_len = 10;
    char *chrm = (char *)malloc(chrm_len*sizeof(char));
    uint32_t start, end;
    long offset;

    uint32_t j = 0;

    struct file_id_offset_pair *p;
    uint32_t intrv_id;

    while (input_file_get_next_interval(i,
                &chrm,
                &chrm_len,
                &start,
                &end,
                &offset) >= 0) {
        fd->mean_interval_size += end-start;
        fd->num_intervals += 1;
    }

    fd->mean_interval_size = fd->mean_interval_size/fd->num_intervals;
    input_file_destroy(&i);
    free(chrm);

    char *out_file_name = "test_file_data_read_write.tmp";

    FILE *f = fopen(out_file_name, "wb");
    unordered_list_store(file_index, f, out_file_name, file_data_store);
    fclose(f);

    f = fopen(out_file_name, "rb");
    struct unordered_list *file_index_r = 
        unordered_list_load(f,
                            out_file_name,
                            file_data_load);

    struct file_data *fd_r = (struct file_data *)
            unordered_list_get(file_index_r, file_id);

    TEST_ASSERT_EQUAL(0, strcmp(fd->file_name, fd_r->file_name));
    TEST_ASSERT_EQUAL(fd->num_intervals, fd_r->num_intervals);
    TEST_ASSERT_EQUAL(fd->mean_interval_size, fd_r->mean_interval_size);

    unordered_list_destroy(&file_index, file_data_free);
    unordered_list_destroy(&file_index_r, file_data_free);

    remove(out_file_name);
}
示例#4
0
int main(int argc, char **argv)
{
    WAH_SIZE = 32;
    WAH_MAX_FILL_WORDS = (1<<(WAH_SIZE-1)) - 1;

    uint32_t num_chrms = 100;

    if ((argc != 4)) {
        errx(1,
             "usage:\t%s <input file> <index dir> <w|i>",
             argv[0]);
    }

    double genome_size =  3095677412.0;

    char *input_file = argv[1];
    char *index_dir = argv[2];
    char *i_type = argv[3];

    struct input_file *in_f = input_file_init(input_file);

    int chrm_len = 50;
    char *chrm = (char *)malloc(chrm_len*sizeof(char));
    uint32_t start, end;
    long offset;

    struct giggle_index *gi;

    gi = giggle_load(index_dir,
                     uint32_t_ll_giggle_set_data_handler);

    uint32_t *file_counts = (uint32_t *)
            calloc(gi->file_index->num, sizeof(uint32_t));

    uint32_t num_intervals = 0;
    double mean_interval_size = 0.0;
    while ( in_f->input_file_get_next_interval(in_f, 
                                               &chrm,
                                               &chrm_len,
                                               &start,
                                               &end,
                                               &offset) >= 0 ) {
        num_intervals += 1;
        mean_interval_size += end - start;

        struct uint32_t_ll *R =
                (struct uint32_t_ll *)giggle_query_region(gi,
                                                          chrm,
                                                          start,
                                                          end);
        if (R != NULL) {
            struct uint32_t_ll_node *curr = R->head;

            while (curr != NULL) {
                /*
                struct file_id_offset_pair *fid_off = 
                    (struct file_id_offset_pair *)
                    unordered_list_get(gi->offset_index, curr->val);
                */
                struct file_id_offset_pair fid_off = 
                    gi->offset_index->vals[curr->val];
                struct file_data *fd = 
                    (struct file_data *)
                    unordered_list_get(gi->file_index, fid_off.file_id);

                file_counts[fid_off.file_id] += 1;

                curr = curr->next;
            }
            uint32_t_ll_free((void **)&R);
        }
    }

    mean_interval_size = mean_interval_size/num_intervals;

    struct doubles_uint32_t_tuple *sig = (struct doubles_uint32_t_tuple *)
        calloc(gi->file_index->num, sizeof(struct doubles_uint32_t_tuple));

    uint32_t i;
    for (i = 0; i < gi->file_index->num; ++i) {
        struct file_data *fd = 
            (struct file_data *)
            unordered_list_get(gi->file_index, i);

        long long n11 = (long long)(file_counts[i]);
        long long n12 = (long long)(MAX(0,num_intervals - file_counts[i]));
        long long n21 = (long long)(MAX(0,fd->num_intervals - file_counts[i]));
        double comp_mean = ((fd->mean_interval_size+mean_interval_size));
        long long n22_full = (long long)
            MAX(n11 + n12 + n21, genome_size/comp_mean);
        long long n22 = MAX(0, n22_full - (n11 + n12 + n21));
        double left, right, two;
        double r = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &two);

        double ratio = (((double)n11/(double)n12) / ((double)n21/(double)n22));

        //fprintf(stderr, "%s\t%f\n", fd->file_name, two);
        sig[i].d1 = right;
        sig[i].d2 = ratio;
        sig[i].u1 = i;
        sig[i].u2 = file_counts[i];
    }

    qsort(sig,
          gi->file_index->num,
          sizeof(struct doubles_uint32_t_tuple), 
          doubles_uint32_t_tuple_cmp);

    for (i = 0; i < gi->file_index->num; ++i) {
        struct file_data *fd = 
            (struct file_data *)
            unordered_list_get(gi->file_index, sig[i].u1);
        /*
        printf("%s\t"
               "right:%f\t"
               "%f\n", fd->file_name, sig[i].d1, sig[i].d2);
        */
        printf( "sig:%f\t"
                "size:%u\t"
                "overlap:%u\t"
                "ratio:%f\t"
                "%s\n",
                sig[i].d1,
                fd->num_intervals,
                sig[i].u2,
                sig[i].d2,
                fd->file_name);
    }

    giggle_index_destroy(&gi);
    cache.destroy();
}