示例#1
0
文件: wah.c 项目: brentp/giggle
//{{{uint64_t wah_non_leading_serialize(void *deserialized,
uint64_t wah_non_leading_serialize(void *deserialized,
                                     void **serialized)
{
    if (deserialized == NULL) {
        *serialized = NULL;
        return 0;
    }

    struct wah_bpt_non_leading_data *d =  
            (struct wah_bpt_non_leading_data *)deserialized;

    uint32_t SA_len = 0, SE_len = 0, serialized_len;

    if (d->SA != NULL)
        SA_len = sizeof(uint32_t) + 
            WAH_LEN(d->SA)*(WAH_SIZE/BYTE)*sizeof(uint8_t);

    if (d->SE != NULL)
        SE_len = sizeof(uint32_t) + 
            WAH_LEN(d->SE)*(WAH_SIZE/BYTE)*sizeof(uint8_t);

    serialized_len = 2*sizeof(uint32_t) + SA_len + SE_len;

    uint8_t *data = (uint8_t *)malloc(serialized_len);

    uint32_t *data_u = (uint32_t *)data;
    data_u[0] = SA_len;
    data_u[1] = SE_len;

    uint32_t data_i = 2*sizeof(uint32_t);


    if (d->SA != NULL) 
        memcpy(data + data_i, d->SA, SA_len);

    data_i += SA_len;


    if (d->SE != NULL) 
        memcpy(data + data_i, d->SE, SE_len);

    data_i += SE_len;

    if (data_i != serialized_len)
        errx(1,
             "Issue with wah_non_leading_serlize lengths. "
             "Expected:%u observed:%u.",
             serialized_len,
             data_i);

    *serialized = data;

    return serialized_len; 
}
示例#2
0
文件: wah.c 项目: brentp/giggle
//{{{uint8_t *wah_copy(uint8_t *w)
uint8_t *wah_copy(uint8_t *w)
{
    if (w == NULL)
        return NULL;

    if (WAH_LEN(w) == 0)
        return NULL;

    uint32_t R_size = sizeof(uint32_t) + 
            (WAH_LEN(w) * (WAH_SIZE/BYTE) * sizeof(uint8_t));
    uint8_t *R = (uint8_t *)malloc(R_size);
    memcpy(R, w, R_size);

    return R;
}
示例#3
0
文件: wah.c 项目: brentp/giggle
//{{{ uint32_t wah_get_ints(uint8_t *X, uint32_t **R)
uint32_t wah_get_ints(uint8_t *X, uint32_t **R)
{
    //uint8_t x;
    uint32_t x;
    uint32_t x_i_size, x_size = 0;
    uint32_t X_len = WAH_LEN(X);
    uint32_t R_len = 0;

    uint32_t i;
    for (i = 0; i < X_len; ++i) {
        //x = WAH_I(X, WAH_SIZE, i);
        x = 0;
        get_wah_i(X, &x, WAH_SIZE, i);
        x_i_size = WAH_NUM_WORDS(x, WAH_SIZE);
        if (x_i_size == 1)
            R_len +=  __builtin_popcount(x);

        x_size += x_i_size * (WAH_SIZE - 1);
    }

    //__builtin_clz(x) takes in a unsigned int, so on smaller
    //types it will count extra zeros, diff counts how many extra there are 
    uint32_t diff = ((sizeof(unsigned int)*BYTE)/WAH_SIZE - 1)*WAH_SIZE;
    uint32_t offset = 0;

    *R = (uint32_t*)calloc(R_len, sizeof(uint32_t));
    uint32_t R_i = 0;
    x_size = 0;
    for (i = 0; i < X_len; ++i) {
        x = 0;
        get_wah_i(X, &x, WAH_SIZE, i);
        x_i_size = WAH_NUM_WORDS(x, WAH_SIZE);
        if ( x_i_size == 1 ) {
            while (x != 0) {
                offset = __builtin_clz(x) - diff;
                (*R)[R_i] = offset + x_size;
                R_i += 1;
                x &= ~(1 << (WAH_SIZE-1-offset));
            }
        }
        x_size += x_i_size * (WAH_SIZE - 1);
    }
    return R_len;
}
示例#4
0
文件: wah.c 项目: brentp/giggle
//{{{uint8_t *wah_init(uint32_t val)
uint8_t *wah_init(uint32_t val)
{
    uint32_t bits_per_word = WAH_SIZE - 1;
    uint32_t num_words = (val + bits_per_word - 1) / bits_per_word;
    // the max number of words 8-bit fill word and represent is 
    // 2**7 - 1 = 127
    // LEN, and WAH_LEN is the number of words, it is independent of word size
    uint32_t len = 1 + (num_words > 1 ? 
            (num_words + WAH_MAX_FILL_WORDS - 1)/WAH_MAX_FILL_WORDS : 0);
    uint8_t *w = (uint8_t *)malloc(sizeof(uint32_t) + 
                    (len * (WAH_SIZE/BYTE)  * sizeof(uint8_t)));
    WAH_LEN(w) = len;

    uint32_t v, i = 0;
    uint32_t saved_words;
    while (val > bits_per_word) {
        saved_words = MIN(num_words - 1, WAH_MAX_FILL_WORDS);
        //WAH_I(w, WAH_SIZE, i) = (1 << (bits_per_word-10)) | (saved_words);
        v = (1 << (bits_per_word)) + (saved_words);
        //fprintf(stderr, "%u\n", v);
        set_wah_i(w, &v, WAH_SIZE, i);
        val -= saved_words * bits_per_word;
        num_words -= saved_words;
        i+=1;
    }

    if (val > 0) {
        //WAH_I(w, WAH_SIZE, i) =  1 << ( bits_per_word - val);
        v = 1 << ( bits_per_word - val);
        //fprintf(stderr, "%u\n", v);
        set_wah_i(w, &v, WAH_SIZE, i);
    } else {
        //WAH_I(w, WAH_SIZE,i) =  0;
        v = 0;
        //fprintf(stderr, "%u\n", v);
        set_wah_i(w, &v, WAH_SIZE, i);
    }

    return w;
}
示例#5
0
文件: wah.c 项目: brentp/giggle
//{{{ uint32_t wah_get_ints_count(uint8_t *X)
uint32_t wah_get_ints_count(uint8_t *X)
{
    uint8_t x;
    uint32_t x_i_size;
    uint32_t X_len = WAH_LEN(X);
    uint32_t R_len = 0;

    uint32_t i;
    for (i = 0; i < X_len; ++i) {

        //x = WAH_I(X, WAH_SIZE, i);
        x = 0;
        get_wah_i(X, &x, WAH_SIZE, i);

        x_i_size = WAH_NUM_WORDS(x, WAH_SIZE);

        if (x_i_size == 1)
            R_len +=  __builtin_popcount(x);
    }

    return R_len;
}
示例#6
0
文件: test.c 项目: theboocock/giggle
int main(int argc, char **argv)
{
    uint32_t num_chrms = 100;

    if ((argc != 4)) {
        errx(1,
             "usage:\t%s <index dir> <region> <w|i>",
             argv[0]);
    }

    char *index_dir = argv[1];
    char *region_s = argv[2];
    char *i_type = argv[3];

    struct giggle_index *gi;
    gi = giggle_load(index_dir,
                     uint32_t_ll_giggle_set_data_handler);


#if 0
    char *chrm = region_s;
    uint32_t start = 0, end = 0;
    uint32_t i, len = strlen(region_s);
    
    for (i = 0; i < len; ++i) {
        if (region_s[i] == ':') {
            region_s[i] = '\0';
            start = atoi(region_s + i + 1);
        } else if (region_s[i] == '-') {
            region_s[i] = '\0';
            end = atoi(region_s + i + 1);
            break;
        }
    }

    struct giggle_index *gi;
    if (i_type[0] == 'i') {
        gi = giggle_load(index_dir,
                         uint32_t_ll_giggle_set_data_handler);

        struct uint32_t_ll *R =
                (struct uint32_t_ll *)giggle_query_region(gi,
                                                          chrm,
                                                          start,
                                                          end);

        if (R != NULL)
            printf("Hits:%u\n", R->len);
        else
            printf("Hits:0\n");

    } else {
        gi = giggle_load(index_dir,
                         wah_giggle_set_data_handler);

        uint32_t chr_id = giggle_get_chrm_id(gi, chrm);
        //return giggle_search(chr_id, gi->root_ids[chr_id], start, end);
        
        uint32_t domain = chr_id;
        uint32_t root_id = gi->root_ids[chr_id];

        uint32_t leaf_start_id;
        int pos_start_id;

        uint32_t nld_start_id = bpt_find(domain,
                                         root_id,
                                         &leaf_start_id,
                                         &pos_start_id,
                                         start);
        fprintf(stderr,
                "nld_start_id:%u\t"
                "leaf_start_id:%u\t"
                "pos_start_id:%u\n",
                nld_start_id,
                leaf_start_id,
                pos_start_id);

        struct bpt_node *leaf_start = cache.get(domain,
                                                leaf_start_id - 1,
                                                &bpt_node_cache_handler);
        bpt_print_node(leaf_start);

        
        struct wah_bpt_non_leading_data *nld = 
                cache.get(domain,
                          BPT_POINTERS(leaf_start)[0] - 1,
                          &wah_non_leading_cache_handler);

        fprintf(stderr,
                "WAH_LEN:%u\t"
                "wah_get_ints_count:%u\t"
                "\n",
                WAH_LEN(nld->SA),
                wah_get_ints_count(nld->SA));
            
        uint32_t *R = NULL;
        uint32_t R_len = wah_get_ints(nld->SA, &R);

        uint32_t i;
        for (i = 0; i < R_len; ++i) {
            fprintf(stderr, "%u:%u\n", i, R[i]);
        }

        /*
        uint8_t *R = (uint8_t *)giggle_query_region(gi,
                                                    chrm,
                                                    start,
                                                    end);
        if (R != NULL)
            printf("Hits:%u\n", wah_get_ints_count(R));
        else
            printf("Hits:0\n");
        */

    }
#endif
    giggle_index_destroy(&gi);
    cache.destroy();
}
示例#7
0
文件: wah.c 项目: brentp/giggle
//{{{ uint32_t wah_or(uint8_t *X, uint8_t *Y, uint8_t **R, uint32_t *R_size)
uint32_t wah_or(uint8_t *X, uint8_t *Y, uint8_t **R, uint32_t *R_size)
{
    uint32_t R_i = 0, X_i = 0, Y_i = 0;
    uint32_t x, y;
    //uint8_t x, y;
    uint32_t x_size, y_size, r_size, y_done = 0, x_done = 0;
    uint32_t X_len = WAH_LEN(X), Y_len = WAH_LEN(Y);
    uint32_t R_len = X_len + Y_len;
    uint32_t reset_R = 0;

    if (*R == NULL) {
        //fprintf(stderr, "reset_R A\n");
        *R_size = sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t));
        *R = (uint8_t *)malloc(*R_size);
        memset(*R, 0, *R_size);
        reset_R = 1;
    } else if (*R_size < sizeof(uint32_t) + 
            (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t))) {
        /*
        fprintf(stderr, "reset_R B\tR_size:%u\t%lu\n",
                *R_size,
                sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t)));
        */
        free(*R);
        *R_size = sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t));
        *R = (uint8_t *)malloc(*R_size);
        memset(*R, 0, *R_size);
        reset_R = 1;
    }

    get_wah_i(X, &x, WAH_SIZE, X_i);
    get_wah_i(Y, &y, WAH_SIZE, Y_i);

    x_size = WAH_NUM_WORDS(x, WAH_SIZE);
    y_size = WAH_NUM_WORDS(y, WAH_SIZE);

    uint32_t v;
    while (1) {
        r_size = MIN(x_size, y_size);

        if (r_size > 1)  {
            v = ((1<< (WAH_SIZE - 1)) + r_size);
        } else {
            v = WAH_VAL(x, WAH_SIZE) | WAH_VAL(y, WAH_SIZE);
        }

        // Grow R if we need to
        if (sizeof(uint32_t) + R_i*(WAH_SIZE/BYTE)*sizeof(uint8_t) == *R_size) {
            uint32_t old_len = R_len;
            reset_R = 1;
            R_len = R_len * 2;
            *R_size = sizeof(uint32_t) + 
                    (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t));
            *R = (uint8_t *) realloc(*R, *R_size);
            memset(*R + sizeof(uint32_t) + 
                    (old_len*(WAH_SIZE/BYTE)*sizeof(uint8_t)),
                   0,
                   old_len*(WAH_SIZE/BYTE)*sizeof(uint8_t) );
        }

        //WAH_I(*R, WAH_SIZE, R_i) = (uint8_t) v;
        set_wah_i(*R, &v, WAH_SIZE, R_i);
        R_i += 1;

        x_size -= r_size;
        y_size -= r_size;

        if ((x_size == 0) && (x_done == 0)) {
            X_i += 1;
            if (X_i == X_len) {
                x_done = 1;
                x = 0;
            } else {
                //x = WAH_I(X, 8, X_i);
                x = 0;
                get_wah_i(X, &x, WAH_SIZE, X_i);
                x_size = WAH_NUM_WORDS(x, WAH_SIZE);
            }
        }

        if ((y_size == 0) && (y_done == 0)) {
            Y_i += 1;
            if (Y_i == Y_len) {
                y_done = 1;
                y = 0;
            } else {
                //y = WAH_I(Y, WAH_SIZE, Y_i);
                y = 0;
                get_wah_i(Y, &y, WAH_SIZE, Y_i);
                y_size = WAH_NUM_WORDS(y, WAH_SIZE);
            }
        }

        if ((x_done == 1) && (y_done == 1))
            break;
        else if (x_done == 1)
            x_size = y_size;
        else if (y_done == 1)
            y_size = x_size;
    }

    R_len = R_i;
    WAH_LEN(*R) = R_len;
    if (reset_R == 1) {
        *R_size = sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t));
        *R = (uint8_t *)realloc(*R, *R_size);
    }

    return reset_R;
}
示例#8
0
文件: wah.c 项目: brentp/giggle
//{{{uint8_t *uints_to_wah(uint32_t *D, uint32_t D_num)
uint8_t *uints_to_wah(uint32_t *D, uint32_t D_num)
{
    uint32_t bits_per_word = WAH_SIZE - 1;
    uint32_t curr_word = 0, // num of words previously considered
             curr_val = 0, // value at the current index
             word_i = 0, // index into the array of words
             dist, fill_size, first_val, last = 0;
    uint32_t val, i;


    uint32_t w_num = D_num*2;
    uint8_t *w = (uint8_t *)malloc(sizeof(uint32_t) + 
                                   (w_num * (WAH_SIZE/BYTE) * sizeof(uint8_t)));

    // loop over the sorted input
    for (i = 0 ; i < D_num; ++i) {
        // get the distance from the current value and the first value in the
        // current word
        val = D[i] - (curr_word * bits_per_word);

        // will the val fit in the current word?
        if (val <= bits_per_word) {
            curr_val |= 1 << ( bits_per_word - val);
        } else  {
            if (curr_val > 0) {
                set_wah_i(w, &curr_val, WAH_SIZE, word_i);
                //fprintf(stderr,"curr_val:%u\tword_i:%u\n", curr_val, word_i);

                curr_word += 1; // move to the next word
                word_i += 1;
                curr_val = 0;

                if (word_i > w_num) {
                    w_num *= 2;
                    w = (uint8_t *)realloc(w,
                                           sizeof(uint32_t) + 
                                           (w_num * 
                                           (WAH_SIZE/BYTE) * 
                                           sizeof(uint8_t)));
                }

                val = D[i] - (curr_word * bits_per_word);
            }

            uint32_t saved_words;
            while (val > bits_per_word) {
                fill_size = ((val + bits_per_word - 1) / bits_per_word) - 1;
                saved_words = MIN(fill_size, WAH_MAX_FILL_WORDS);
                curr_val = (1 << (bits_per_word)) + (saved_words);

                set_wah_i(w, &curr_val, WAH_SIZE, word_i);
                //fprintf(stderr,"curr_val:%u\tword_i:%u\n", curr_val, word_i);

                curr_word += saved_words; // move to the next word
                word_i += 1;
                curr_val = 0;
                if (word_i > w_num) {
                    w_num *= 2;
                    w = (uint8_t *)realloc(w,
                                           sizeof(uint32_t) + 
                                           (w_num * 
                                           (WAH_SIZE/BYTE) * 
                                           sizeof(uint8_t)));
                }
                
                val -= saved_words * bits_per_word;
            }

            if (val > 0) {
                curr_val = 1 << ( bits_per_word - val);
            } else {
                curr_val = 0;
            }
        }
    }

    if (curr_val > 0)  {
        set_wah_i(w, &curr_val, WAH_SIZE, word_i);
        //fprintf(stderr,"curr_val:%u\tword_i:%u\n", curr_val, word_i);
    }

    w = (uint8_t *)realloc(w,
                           sizeof(uint32_t) + 
                           ((word_i + 1) * 
                           (WAH_SIZE/BYTE) * 
                           sizeof(uint8_t)));

    WAH_LEN(w) = word_i + 1;
    //fprintf(stderr, "WAH_LEN:%u\n", WAH_LEN(w));
    return w;
}