int main(int argc, char* argv[]) { std::vector<std::string> word_list; std::deque<std::string> outliers; if (!load_word_list(argc,argv,word_list)) { return 1; } generate_outliers(word_list,outliers); unsigned int random_seed = 0xA57EC3B2; const double desired_probability_of_false_positive = 1.0 / word_list.size(); compressible_bloom_filter filter(word_list.size(),desired_probability_of_false_positive,random_seed); filter.insert(word_list.begin(),word_list.end()); std::cout << "Filter Size\tEFPP \tOFPP \tDiff" << std::endl; while (filter.size() > 1) { std::vector<std::string>::iterator it = filter.contains_all(word_list.begin(),word_list.end()); if (word_list.end() != it) { std::cout << "ERROR: key not found in bloom filter! =>" << (*it) << std::endl; return 1; } std::size_t total_false_positive = 0; for (std::deque<std::string>::iterator it = outliers.begin(); it != outliers.end(); ++it) { if (filter.contains(*it)) ++total_false_positive; } double pfp = total_false_positive / (1.0 * outliers.size()); printf("%11llu\t%8.7f\t%8.7f\t%8.6f\n", static_cast<unsigned long long>(filter.size()), filter.effective_fpp(), pfp, 100.0 * (pfp / filter.effective_fpp())); if (!filter.compress(5.0)) { std::cout << "Filter cannot be compressed any further." << std::endl; break; } } return 0; }
int main(int argc, char* argv[]) { std::vector<std::string> word_list; std::deque<std::string> outliers; if (!load_word_list(argc,argv,word_list)) { return 1; } generate_outliers(word_list,outliers); unsigned int random_seed = 0; std::size_t word_list_storage_size = 0; for(unsigned int i = 0; i < word_list.size(); ++i) { word_list_storage_size += word_list[i].size(); } std::size_t total_number_of_queries = 0; const double desired_probability_of_false_positive = 1.0 / word_list.size(); printf("Round\tQueries \tFPQ \tIPFP \tPFP \tDPFP \tTvD \n"); while(random_seed < 1000) { bloom_filter filter(word_list.size(),desired_probability_of_false_positive,random_seed++); filter.insert(word_list.begin(),word_list.end()); std::vector<std::string>::iterator it = filter.contains_all(word_list.begin(),word_list.end()); if (word_list.end() != it) { std::cout << "ERROR: key not found! =>" << (*it) << std::endl; return 1; } std::size_t total_false_positive = 0; for(std::deque<std::string>::iterator it = outliers.begin(); it != outliers.end(); ++it) { if (filter.contains(*it)) { ++total_false_positive; } } total_number_of_queries += (outliers.size() + word_list.size()); // Overall false positive probability double pfp = total_false_positive / (1.0 * outliers.size()); printf("%10llu\t%10llu\t%6llu\t%8.7f\t%8.7f\t%8.6f\t%8.6f\n", static_cast<unsigned long long>(random_seed), static_cast<unsigned long long>(total_number_of_queries), static_cast<unsigned long long>(total_false_positive), desired_probability_of_false_positive, pfp, (100.0 * pfp) / desired_probability_of_false_positive, (100.0 * filter.size()) / (bits_per_char * word_list_storage_size)); } return 0; }