//{{{uint64_t wah_non_leading_serialize(void *deserialized, uint64_t wah_non_leading_serialize(void *deserialized, void **serialized) { if (deserialized == NULL) { *serialized = NULL; return 0; } struct wah_bpt_non_leading_data *d = (struct wah_bpt_non_leading_data *)deserialized; uint32_t SA_len = 0, SE_len = 0, serialized_len; if (d->SA != NULL) SA_len = sizeof(uint32_t) + WAH_LEN(d->SA)*(WAH_SIZE/BYTE)*sizeof(uint8_t); if (d->SE != NULL) SE_len = sizeof(uint32_t) + WAH_LEN(d->SE)*(WAH_SIZE/BYTE)*sizeof(uint8_t); serialized_len = 2*sizeof(uint32_t) + SA_len + SE_len; uint8_t *data = (uint8_t *)malloc(serialized_len); uint32_t *data_u = (uint32_t *)data; data_u[0] = SA_len; data_u[1] = SE_len; uint32_t data_i = 2*sizeof(uint32_t); if (d->SA != NULL) memcpy(data + data_i, d->SA, SA_len); data_i += SA_len; if (d->SE != NULL) memcpy(data + data_i, d->SE, SE_len); data_i += SE_len; if (data_i != serialized_len) errx(1, "Issue with wah_non_leading_serlize lengths. " "Expected:%u observed:%u.", serialized_len, data_i); *serialized = data; return serialized_len; }
//{{{uint8_t *wah_copy(uint8_t *w) uint8_t *wah_copy(uint8_t *w) { if (w == NULL) return NULL; if (WAH_LEN(w) == 0) return NULL; uint32_t R_size = sizeof(uint32_t) + (WAH_LEN(w) * (WAH_SIZE/BYTE) * sizeof(uint8_t)); uint8_t *R = (uint8_t *)malloc(R_size); memcpy(R, w, R_size); return R; }
//{{{ uint32_t wah_get_ints(uint8_t *X, uint32_t **R) uint32_t wah_get_ints(uint8_t *X, uint32_t **R) { //uint8_t x; uint32_t x; uint32_t x_i_size, x_size = 0; uint32_t X_len = WAH_LEN(X); uint32_t R_len = 0; uint32_t i; for (i = 0; i < X_len; ++i) { //x = WAH_I(X, WAH_SIZE, i); x = 0; get_wah_i(X, &x, WAH_SIZE, i); x_i_size = WAH_NUM_WORDS(x, WAH_SIZE); if (x_i_size == 1) R_len += __builtin_popcount(x); x_size += x_i_size * (WAH_SIZE - 1); } //__builtin_clz(x) takes in a unsigned int, so on smaller //types it will count extra zeros, diff counts how many extra there are uint32_t diff = ((sizeof(unsigned int)*BYTE)/WAH_SIZE - 1)*WAH_SIZE; uint32_t offset = 0; *R = (uint32_t*)calloc(R_len, sizeof(uint32_t)); uint32_t R_i = 0; x_size = 0; for (i = 0; i < X_len; ++i) { x = 0; get_wah_i(X, &x, WAH_SIZE, i); x_i_size = WAH_NUM_WORDS(x, WAH_SIZE); if ( x_i_size == 1 ) { while (x != 0) { offset = __builtin_clz(x) - diff; (*R)[R_i] = offset + x_size; R_i += 1; x &= ~(1 << (WAH_SIZE-1-offset)); } } x_size += x_i_size * (WAH_SIZE - 1); } return R_len; }
//{{{uint8_t *wah_init(uint32_t val) uint8_t *wah_init(uint32_t val) { uint32_t bits_per_word = WAH_SIZE - 1; uint32_t num_words = (val + bits_per_word - 1) / bits_per_word; // the max number of words 8-bit fill word and represent is // 2**7 - 1 = 127 // LEN, and WAH_LEN is the number of words, it is independent of word size uint32_t len = 1 + (num_words > 1 ? (num_words + WAH_MAX_FILL_WORDS - 1)/WAH_MAX_FILL_WORDS : 0); uint8_t *w = (uint8_t *)malloc(sizeof(uint32_t) + (len * (WAH_SIZE/BYTE) * sizeof(uint8_t))); WAH_LEN(w) = len; uint32_t v, i = 0; uint32_t saved_words; while (val > bits_per_word) { saved_words = MIN(num_words - 1, WAH_MAX_FILL_WORDS); //WAH_I(w, WAH_SIZE, i) = (1 << (bits_per_word-10)) | (saved_words); v = (1 << (bits_per_word)) + (saved_words); //fprintf(stderr, "%u\n", v); set_wah_i(w, &v, WAH_SIZE, i); val -= saved_words * bits_per_word; num_words -= saved_words; i+=1; } if (val > 0) { //WAH_I(w, WAH_SIZE, i) = 1 << ( bits_per_word - val); v = 1 << ( bits_per_word - val); //fprintf(stderr, "%u\n", v); set_wah_i(w, &v, WAH_SIZE, i); } else { //WAH_I(w, WAH_SIZE,i) = 0; v = 0; //fprintf(stderr, "%u\n", v); set_wah_i(w, &v, WAH_SIZE, i); } return w; }
//{{{ uint32_t wah_get_ints_count(uint8_t *X) uint32_t wah_get_ints_count(uint8_t *X) { uint8_t x; uint32_t x_i_size; uint32_t X_len = WAH_LEN(X); uint32_t R_len = 0; uint32_t i; for (i = 0; i < X_len; ++i) { //x = WAH_I(X, WAH_SIZE, i); x = 0; get_wah_i(X, &x, WAH_SIZE, i); x_i_size = WAH_NUM_WORDS(x, WAH_SIZE); if (x_i_size == 1) R_len += __builtin_popcount(x); } return R_len; }
int main(int argc, char **argv) { uint32_t num_chrms = 100; if ((argc != 4)) { errx(1, "usage:\t%s <index dir> <region> <w|i>", argv[0]); } char *index_dir = argv[1]; char *region_s = argv[2]; char *i_type = argv[3]; struct giggle_index *gi; gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); #if 0 char *chrm = region_s; uint32_t start = 0, end = 0; uint32_t i, len = strlen(region_s); for (i = 0; i < len; ++i) { if (region_s[i] == ':') { region_s[i] = '\0'; start = atoi(region_s + i + 1); } else if (region_s[i] == '-') { region_s[i] = '\0'; end = atoi(region_s + i + 1); break; } } struct giggle_index *gi; if (i_type[0] == 'i') { gi = giggle_load(index_dir, uint32_t_ll_giggle_set_data_handler); struct uint32_t_ll *R = (struct uint32_t_ll *)giggle_query_region(gi, chrm, start, end); if (R != NULL) printf("Hits:%u\n", R->len); else printf("Hits:0\n"); } else { gi = giggle_load(index_dir, wah_giggle_set_data_handler); uint32_t chr_id = giggle_get_chrm_id(gi, chrm); //return giggle_search(chr_id, gi->root_ids[chr_id], start, end); uint32_t domain = chr_id; uint32_t root_id = gi->root_ids[chr_id]; uint32_t leaf_start_id; int pos_start_id; uint32_t nld_start_id = bpt_find(domain, root_id, &leaf_start_id, &pos_start_id, start); fprintf(stderr, "nld_start_id:%u\t" "leaf_start_id:%u\t" "pos_start_id:%u\n", nld_start_id, leaf_start_id, pos_start_id); struct bpt_node *leaf_start = cache.get(domain, leaf_start_id - 1, &bpt_node_cache_handler); bpt_print_node(leaf_start); struct wah_bpt_non_leading_data *nld = cache.get(domain, BPT_POINTERS(leaf_start)[0] - 1, &wah_non_leading_cache_handler); fprintf(stderr, "WAH_LEN:%u\t" "wah_get_ints_count:%u\t" "\n", WAH_LEN(nld->SA), wah_get_ints_count(nld->SA)); uint32_t *R = NULL; uint32_t R_len = wah_get_ints(nld->SA, &R); uint32_t i; for (i = 0; i < R_len; ++i) { fprintf(stderr, "%u:%u\n", i, R[i]); } /* uint8_t *R = (uint8_t *)giggle_query_region(gi, chrm, start, end); if (R != NULL) printf("Hits:%u\n", wah_get_ints_count(R)); else printf("Hits:0\n"); */ } #endif giggle_index_destroy(&gi); cache.destroy(); }
//{{{ uint32_t wah_or(uint8_t *X, uint8_t *Y, uint8_t **R, uint32_t *R_size) uint32_t wah_or(uint8_t *X, uint8_t *Y, uint8_t **R, uint32_t *R_size) { uint32_t R_i = 0, X_i = 0, Y_i = 0; uint32_t x, y; //uint8_t x, y; uint32_t x_size, y_size, r_size, y_done = 0, x_done = 0; uint32_t X_len = WAH_LEN(X), Y_len = WAH_LEN(Y); uint32_t R_len = X_len + Y_len; uint32_t reset_R = 0; if (*R == NULL) { //fprintf(stderr, "reset_R A\n"); *R_size = sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t)); *R = (uint8_t *)malloc(*R_size); memset(*R, 0, *R_size); reset_R = 1; } else if (*R_size < sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t))) { /* fprintf(stderr, "reset_R B\tR_size:%u\t%lu\n", *R_size, sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t))); */ free(*R); *R_size = sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t)); *R = (uint8_t *)malloc(*R_size); memset(*R, 0, *R_size); reset_R = 1; } get_wah_i(X, &x, WAH_SIZE, X_i); get_wah_i(Y, &y, WAH_SIZE, Y_i); x_size = WAH_NUM_WORDS(x, WAH_SIZE); y_size = WAH_NUM_WORDS(y, WAH_SIZE); uint32_t v; while (1) { r_size = MIN(x_size, y_size); if (r_size > 1) { v = ((1<< (WAH_SIZE - 1)) + r_size); } else { v = WAH_VAL(x, WAH_SIZE) | WAH_VAL(y, WAH_SIZE); } // Grow R if we need to if (sizeof(uint32_t) + R_i*(WAH_SIZE/BYTE)*sizeof(uint8_t) == *R_size) { uint32_t old_len = R_len; reset_R = 1; R_len = R_len * 2; *R_size = sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t)); *R = (uint8_t *) realloc(*R, *R_size); memset(*R + sizeof(uint32_t) + (old_len*(WAH_SIZE/BYTE)*sizeof(uint8_t)), 0, old_len*(WAH_SIZE/BYTE)*sizeof(uint8_t) ); } //WAH_I(*R, WAH_SIZE, R_i) = (uint8_t) v; set_wah_i(*R, &v, WAH_SIZE, R_i); R_i += 1; x_size -= r_size; y_size -= r_size; if ((x_size == 0) && (x_done == 0)) { X_i += 1; if (X_i == X_len) { x_done = 1; x = 0; } else { //x = WAH_I(X, 8, X_i); x = 0; get_wah_i(X, &x, WAH_SIZE, X_i); x_size = WAH_NUM_WORDS(x, WAH_SIZE); } } if ((y_size == 0) && (y_done == 0)) { Y_i += 1; if (Y_i == Y_len) { y_done = 1; y = 0; } else { //y = WAH_I(Y, WAH_SIZE, Y_i); y = 0; get_wah_i(Y, &y, WAH_SIZE, Y_i); y_size = WAH_NUM_WORDS(y, WAH_SIZE); } } if ((x_done == 1) && (y_done == 1)) break; else if (x_done == 1) x_size = y_size; else if (y_done == 1) y_size = x_size; } R_len = R_i; WAH_LEN(*R) = R_len; if (reset_R == 1) { *R_size = sizeof(uint32_t) + (R_len*(WAH_SIZE/BYTE)*sizeof(uint8_t)); *R = (uint8_t *)realloc(*R, *R_size); } return reset_R; }
//{{{uint8_t *uints_to_wah(uint32_t *D, uint32_t D_num) uint8_t *uints_to_wah(uint32_t *D, uint32_t D_num) { uint32_t bits_per_word = WAH_SIZE - 1; uint32_t curr_word = 0, // num of words previously considered curr_val = 0, // value at the current index word_i = 0, // index into the array of words dist, fill_size, first_val, last = 0; uint32_t val, i; uint32_t w_num = D_num*2; uint8_t *w = (uint8_t *)malloc(sizeof(uint32_t) + (w_num * (WAH_SIZE/BYTE) * sizeof(uint8_t))); // loop over the sorted input for (i = 0 ; i < D_num; ++i) { // get the distance from the current value and the first value in the // current word val = D[i] - (curr_word * bits_per_word); // will the val fit in the current word? if (val <= bits_per_word) { curr_val |= 1 << ( bits_per_word - val); } else { if (curr_val > 0) { set_wah_i(w, &curr_val, WAH_SIZE, word_i); //fprintf(stderr,"curr_val:%u\tword_i:%u\n", curr_val, word_i); curr_word += 1; // move to the next word word_i += 1; curr_val = 0; if (word_i > w_num) { w_num *= 2; w = (uint8_t *)realloc(w, sizeof(uint32_t) + (w_num * (WAH_SIZE/BYTE) * sizeof(uint8_t))); } val = D[i] - (curr_word * bits_per_word); } uint32_t saved_words; while (val > bits_per_word) { fill_size = ((val + bits_per_word - 1) / bits_per_word) - 1; saved_words = MIN(fill_size, WAH_MAX_FILL_WORDS); curr_val = (1 << (bits_per_word)) + (saved_words); set_wah_i(w, &curr_val, WAH_SIZE, word_i); //fprintf(stderr,"curr_val:%u\tword_i:%u\n", curr_val, word_i); curr_word += saved_words; // move to the next word word_i += 1; curr_val = 0; if (word_i > w_num) { w_num *= 2; w = (uint8_t *)realloc(w, sizeof(uint32_t) + (w_num * (WAH_SIZE/BYTE) * sizeof(uint8_t))); } val -= saved_words * bits_per_word; } if (val > 0) { curr_val = 1 << ( bits_per_word - val); } else { curr_val = 0; } } } if (curr_val > 0) { set_wah_i(w, &curr_val, WAH_SIZE, word_i); //fprintf(stderr,"curr_val:%u\tword_i:%u\n", curr_val, word_i); } w = (uint8_t *)realloc(w, sizeof(uint32_t) + ((word_i + 1) * (WAH_SIZE/BYTE) * sizeof(uint8_t))); WAH_LEN(w) = word_i + 1; //fprintf(stderr, "WAH_LEN:%u\n", WAH_LEN(w)); return w; }