/* creates a new counting bloom filter from a given scaling bloom filter, with count and id */ counting_bloom_t *new_counting_bloom_from_scale(scaling_bloom_t *bloom) { int i; long offset; double error_rate; counting_bloom_t *cur_bloom; error_rate = bloom->error_rate * (pow(ERROR_TIGHTENING_RATIO, bloom->num_blooms + 1)); if ((bloom->blooms = (counting_bloom_t **)realloc(bloom->blooms, (bloom->num_blooms + 1) * sizeof(counting_bloom_t *))) == NULL) { fprintf(stderr, "Error, could not realloc a new bloom filter\n"); return NULL; } cur_bloom = counting_bloom_init(bloom->capacity, error_rate, bloom->num_bytes); bloom->blooms[bloom->num_blooms] = cur_bloom; bloom->bitmap = bitmap_resize(bloom->bitmap, bloom->num_bytes, bloom->num_bytes + cur_bloom->num_bytes); /* reset header pointer, as mmap may have moved */ bloom->header = (scaling_bloom_header_t *) bloom->bitmap->array; /* Set the pointers for these header structs to the right location since mmap may have moved */ bloom->num_blooms++; for (i = 0; i < bloom->num_blooms; i++) { offset = bloom->blooms[i]->offset - sizeof(counting_bloom_header_t); bloom->blooms[i]->header = (counting_bloom_header_t *) (bloom->bitmap->array + offset); } bloom->num_bytes += cur_bloom->num_bytes; cur_bloom->bitmap = bloom->bitmap; return cur_bloom; }
counting_bloom_t *new_counting_bloom(unsigned int capacity, double error_rate) { counting_bloom_t *cur_bloom; cur_bloom = counting_bloom_init(capacity, error_rate, 0); cur_bloom->bitmap = new_bitmap(cur_bloom->num_bytes); cur_bloom->header = (counting_bloom_header_t *) (cur_bloom->bitmap->array); return cur_bloom; }
counting_bloom_t *new_counting_bloom(unsigned int capacity, double error_rate, const char *filename) { counting_bloom_t *cur_bloom; int fd; if ((fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600)) < 0) { perror("Error, Opening File Failed"); fprintf(stderr, " %s \n", filename); return NULL; } cur_bloom = counting_bloom_init(capacity, error_rate, 0); cur_bloom->bitmap = new_bitmap(fd, cur_bloom->num_bytes); cur_bloom->header = (counting_bloom_header_t *)(cur_bloom->bitmap->array); return cur_bloom; }
counting_bloom_t *counting_bloom_from_file(unsigned capacity, double error_rate, const char *filename) { counting_bloom_t *cur_bloom; int fd; if ((fd = open(filename, O_RDWR, (mode_t)0600)) < 0) { perror("Error, Opening File Failed"); fprintf(stderr, " %s \n", filename); return NULL; } cur_bloom = counting_bloom_init(capacity, error_rate, 0); cur_bloom->parent_bitmap = new_bitmap(fd, cur_bloom->num_bytes); return cur_bloom; }
int main(int argc, char **argv) { printf("Testing Counting Bloom version %s\n", counting_bloom_get_version()); CountingBloom cb; counting_bloom_init(&cb, 10, 0.01); counting_bloom_add_string(&cb, "test"); counting_bloom_add_string(&cb, "out"); counting_bloom_add_string(&cb, "the"); counting_bloom_add_string(&cb, "counting"); counting_bloom_add_string(&cb, "bloom"); counting_bloom_add_string(&cb, "filter"); // we can add it multiple times! counting_bloom_add_string(&cb, "test"); counting_bloom_add_string(&cb, "Test"); // should not be added to the 'test' strings counting_bloom_add_string(&cb, "out"); counting_bloom_add_string(&cb, "test"); if (counting_bloom_check_string(&cb, "test") == COUNTING_BLOOM_SUCCESS) { printf("'test' was found in the counting bloom with false positive rate of %f!\n", counting_bloom_current_false_positive_rate(&cb)); printf("'test' is in the counting bloom a maximum of %d times!\n", counting_bloom_get_max_insertions(&cb, "test")); } else { printf("'test' was not found in the counting bloom!\n"); } printf("Export the Counting Bloom!\n"); counting_bloom_export(&cb, "./dist/test.cbm"); counting_bloom_stats(&cb); counting_bloom_destroy(&cb); printf("Exported and destroyed the original counting bloom!\n\n"); printf("Re-import the bloom filter!\n"); CountingBloom cb1; counting_bloom_import(&cb1, "./dist/test.cbm"); if (counting_bloom_check_string(&cb1, "test") == COUNTING_BLOOM_SUCCESS) { printf("'test' was found in the counting bloom with false positive rate of %f!\n", counting_bloom_current_false_positive_rate(&cb1)); printf("'test' is in the counting bloom a maximum of %d times!\n", counting_bloom_get_max_insertions(&cb1, "test")); } else { printf("'test' was not found in the counting bloom!\n"); } if (counting_bloom_check_string(&cb1, "blah") == COUNTING_BLOOM_SUCCESS) { printf("'blah' was found in the counting bloom with false positive rate of %f!\n", counting_bloom_current_false_positive_rate(&cb1)); printf("'blah' is in the counting bloom a maximum of %d times!\n", counting_bloom_get_max_insertions(&cb1, "test")); } else { printf("'blah' was not found in the counting bloom!\n"); } counting_bloom_stats(&cb1); counting_bloom_destroy(&cb1); }
counting_bloom_t *new_counting_bloom_from_file(unsigned int capacity, double error_rate, const char *filename) { int fd; off_t size; counting_bloom_t *bloom; if ((fd = open(filename, O_RDWR, (mode_t)0600)) < 0) { fprintf(stderr, "Error, Could not open file %s: %s\n", filename, strerror(errno)); return NULL; } if ((size = lseek(fd, 0, SEEK_END)) < 0) { perror("Error, calling lseek() to tell file size"); close(fd); return NULL; } if (size == 0) { fprintf(stderr, "Error, File size zero\n"); } bloom = counting_bloom_init(capacity, error_rate, 0); if (size != bloom->num_bytes) { free_counting_bloom(bloom); fprintf(stderr, "Error, Actual filesize and expected filesize are not equal\n"); return NULL; } if ((bloom->bitmap = new_bitmap(fd, size)) == NULL) { fprintf(stderr, "Error, Could not create bitmap with file\n"); free_counting_bloom(bloom); return NULL; } bloom->header = (counting_bloom_header_t *)(bloom->bitmap->array); return bloom; }