END_TEST START_TEST(test_hashes_key_length) { uint32_t k_num = 10; char *key = "cat\0A123456890"; char *key1 = "cat\0ABCDEFGHI"; uint64_t hashes[10]; uint64_t hashes_cpy[10]; bloom_filter_header bh; bloom_bloomfilter filter; bh.k_num = k_num; filter.header = &bh; bf_compute_hashes(filter, key, (uint64_t*)&hashes); // Copy the hashes for (int i=0; i< 10; i++) { hashes_cpy[i] = hashes[i]; } // Compute of second variant bf_compute_hashes(filter, key1, (uint64_t*)&hashes); // Check for equality for (int i=0; i< 10; i++) { fail_unless(hashes_cpy[i] == hashes[i]); } }
END_TEST START_TEST(test_hashes_consistent) { uint32_t k_num = 10; char *key = "cat"; char *key2= "abcdefghijklmnopqrstuvwxyz"; uint64_t hashes[10]; uint64_t hashes_cpy[10]; bloom_filter_header bh; bloom_bloomfilter filter; bh.k_num = k_num; filter.header = &bh; bf_compute_hashes(filter, key, (uint64_t*)&hashes); // Copy the hashes for (int i=0; i< 10; i++) { hashes_cpy[i] = hashes[i]; } // Compute something else, then re-hash the first key bf_compute_hashes(filter, key2, (uint64_t*)&hashes); bf_compute_hashes(filter, key, (uint64_t*)&hashes); // Check for equality for (int i=0; i< 10; i++) { fail_unless(hashes_cpy[i] == hashes[i]); } }
/** * Adds a new key to the bloom filter. * @arg filter The filter to add to * @arg key The key to add * @returns 1 if the key was added, 0 if present. Negative on failure. */ int bf_add(bloom_bloomfilter *filter, char* key) { // Allocate the hash space uint64_t *hashes = alloca(filter->header->k_num * sizeof(uint64_t)); // Compute the hashes bf_compute_hashes(filter->header->k_num, key, hashes); // Check if the item exists int res = bf_internal_contains(filter, hashes); if (res == 1) { return 0; // Key already present, do not add. } uint64_t m = filter->offset; uint64_t offset; uint64_t h; uint32_t i; uint64_t bit; for (i=0; i< filter->header->k_num; i++) { h = hashes[i]; // Get the hash value offset = 8*sizeof(bloom_filter_header) + i * m; // Get the partition offset bit = offset + (h % m); // Compute the bit offset bitmap_setbit(filter->map, bit); } filter->header->count += 1; return 1; }
END_TEST START_TEST(test_hashes_same_buffer) { uint32_t k_num = 10; uint64_t hashes[10]; bloom_filter_header bh; bloom_bloomfilter filter; bh.k_num = k_num; filter.header = &bh; char buf[100]; uint64_t hash0 = 0; snprintf((char*)&buf, 100, "test0"); bf_compute_hashes(filter, (char*)&buf, (uint64_t*)&hashes); for (int i=0; i< 10; i++) { hash0 ^= hashes[i]; } uint64_t hash1 = 0; snprintf((char*)&buf, 100, "ABCDEFGHI"); bf_compute_hashes(filter, (char*)&buf, (uint64_t*)&hashes); for (int i=0; i< 10; i++) { hash1 ^= hashes[i]; } uint64_t hash2 = 0; snprintf((char*)&buf, 100, "test0"); bf_compute_hashes(filter, (char*)&buf, (uint64_t*)&hashes); for (int i=0; i< 10; i++) { hash2 ^= hashes[i]; } uint64_t hash3 = 0; snprintf((char*)&buf, 100, "ABCDEFGHI"); bf_compute_hashes(filter, (char*)&buf, (uint64_t*)&hashes); for (int i=0; i< 10; i++) { hash3 ^= hashes[i]; } fail_unless(hash0 == hash2); fail_unless(hash1 == hash3); }
/** * Checks the filter for a key * @arg filter The filter to check * @arg key The key to check * @returns 1 if present, 0 if not present, negative on error. */ int bf_contains(bloom_bloomfilter *filter, char* key) { // Allocate the hash space uint64_t *hashes = alloca(filter->header->k_num * sizeof(uint64_t)); // Compute the hashes bf_compute_hashes(filter->header->k_num, key, hashes); // Use the internal contains method return bf_internal_contains(filter, hashes); }
END_TEST START_TEST(test_hashes_basic) { uint32_t k_num = 1000; char *key = "the quick brown fox"; uint64_t hashes[1000]; bloom_filter_header bh; bloom_bloomfilter filter; bh.k_num = k_num; filter.header = &bh; bf_compute_hashes(filter, key, (uint64_t*)&hashes); // Check that all the hashes are unique. // This is O(n^2) but f**k it. for (int i=0;i<1000;i++) { for (int j=i+1;j<1000;j++) { fail_unless(hashes[i] != hashes[j]); } } }