int main() {
	
	BloomFilter *s = new BloomFilter(4);
	char c;
	//cout<<"BLABLA";
	Key k;
	do {
		cout<<"a - add\nf - find\nd - del\nq - quit\n";
		cin>>c;
		if (!(c!='a' || c!='f' || c!='d' || c!='q'))
			continue;
		cout<<"Value:\n";
		cin>>k;
		if(c=='a') {
			
			s->add(k);
		} else if(c == 'f') {
			
			if((s->testExist(k, false)))
				cout<<"Found!\n";
			else
				cout<<"Nod found!\n";
			
		} else if(c == 'd') {
			s->del(k);
		} else {
			break;
		}
			}while(c!='q');
	cout<<"That's all, folks!\n";
	return 0;
}
Esempio n. 2
0
int main(int argc, char *argv[]) {

  // Test TTTDs
  std::filebuf fb;
  if (fb.open(argv[1], std::ios::in))
  {
    std::istream input(&fb);
    // The following TTTDs parameters are from TTTDs paper, in practice we should set the chunk size larger
    TTTDsChunker chunker(460, 2800, 540, 270, 1, 1600);
    vector<Chunk *> *chunks = chunker.createChunks(input);


    // Test FBC
    FBCChunker fChunker(2000, 500, 32);

    for (vector<Chunk *>::iterator c = chunks->begin(); c != chunks->end(); c++) {
      fChunker.splitBigChunk(**c);
    }

    fChunker.printFreqTable();

    fb.close();
  } else {
    cout << "Can not open input stream" << endl;
    exit(EXIT_FAILURE);
  }

  // Test bloom filter
  BloomFilter bf;  
  cout << "Before insert: " << bf.lookup("hello", 6) << endl;
  bf.insert("hello", 6);
  cout << "After insert: " << bf.lookup("hello", 6) << endl;  

  return 0;
}
Esempio n. 3
0
int main(int argc, char* argv[])
{
	size_t size = BloomFilter<std::string>::GetBufferSize(INSERT_NUM, 0.001);
	size_t k = BloomFilter<std::string>::GetK(INSERT_NUM, 0.001);
	BloomFilter<std::string> bf = BloomFilter<std::string>::CreateBloomFilter(size, k);

	double success = 0;
	for(uint32_t i=0; i<INSERT_NUM; ++i)
	{
		std::string strUrl = (boost::format("http://voanews.com/article/%u") % i).str();
		if(bf.Contains(strUrl))
			break;

		++success;
		//printf("crawl url[%s]...\n", strUrl.c_str());
		bf.Add(strUrl);
	}

	printf("success count: %u[size:%lu, k:%lu]\n", (uint32_t)success, size, k);

	// dump bloomfilter bitmap buffer
	// bf.Dump();

	bf.Delete();
	return 0;
}
Esempio n. 4
0
void loadSeqr(BloomFilter & myFilter, const string & seq) {
    if (seq.size() < opt::kmerLen) return;
    uint64_t fhVal, rhVal;
    myFilter.insert(seq.c_str(), fhVal, rhVal);
    for (size_t i = 1; i < seq.size() - opt::kmerLen + 1; i++) {
        myFilter.insert(fhVal, rhVal, seq[i-1], seq[i+opt::kmerLen-1]);
    }
}
Esempio n. 5
0
int main(int argc, char *argv[]) {
	ifstream fpStrPool(argv[1]);
	ifstream fpCheckedStr(argv[2]);
	ofstream fpResult(argv[3]);
	clock_t begin = clock();
	BloomFilter b;
	b.bfCheck(fpStrPool, fpCheckedStr, fpResult);
	clock_t end = clock();
	fpResult << (double)(end - begin) / CLOCKS_PER_SEC;
	fpResult << "s" << endl;
	fpResult << "------------------bfend---------------------- ";
	return 0;
}
Esempio n. 6
0
int main()
{
    BloomFilter bf;

    bf.add("hoge");
    bf.add("fuga");

    cout << (bf.exist("hoge") ? "true" : "false") << endl;
    cout << (bf.exist("fuga") ? "true" : "false") << endl;
    cout << (bf.exist("kuso") ? "true" : "false") << endl;

    return 0;
}
Esempio n. 7
0
void collectImpl(const char* phase) {
    VMRegAnchor _;
    if (t_eager_gc && RuntimeOption::EvalFilterGCPoints) {
        t_eager_gc = false;
        auto pc = vmpc();
        if (t_surprise_filter.test(pc)) return;
        t_surprise_filter.insert(pc);
        TRACE(2, "eager gc %s at %p\n", phase, pc);
    } else {
        TRACE(2, "normal gc %s at %p\n", phase, vmpc());
    }
    Marker mkr;
    mkr.init();
    mkr.trace();
    mkr.sweep();
}
Esempio n. 8
0
int main(){
  BloomFilter *bf = new BloomFilter(1024);

  string st1 = "this";
  string st2 = "is";
  string st3 = "a";
  string st4 = "test";

  bf->add(st1.c_str(),st1.length());
  bf->add(st2.c_str(),st2.length());
  cout<<"bloom filter contains \""<<st1 <<"\"" <<":"<< bf->contains(st1.c_str(), st1.length()) << endl;
  cout<<"bloom filter contains \""<<st2 <<"\"" <<":"<< bf->contains(st2.c_str(), st2.length()) << endl;
  cout<<"bloom filter contains \""<<st3 <<"\"" <<":"<< bf->contains(st3.c_str(), st3.length()) << endl;
  cout<<"bloom filter contains \""<<st4 <<"\"" <<":"<< bf->contains(st4.c_str(), st4.length()) << endl;


}
Esempio n. 9
0
void loadSeqx(BloomFilter & myFilter, const string & seq) {
    if (seq.size() < opt::kmerLen) return;
    for (size_t i = 0; i < seq.size() - opt::kmerLen + 1; i++) {
        string kmer = seq.substr(i, opt::kmerLen);
        getCanon(kmer);
        myFilter.insertXxh(kmer.c_str());
    }
}
Esempio n. 10
0
bool FilterHandler::operator()(Peer* origin, Message& msg) {
    log_trace("args: origin: %s, msg: %s", origin->endpoint().toString(), msg.command());
    if (origin->version() == 0) {
        throw OriginNotReady();
    }
    if (msg.command() == "filterload") {
        BloomFilter filter;
        
        istringstream is(msg.payload());
        is >> filter;
        
        if (filter.isWithinSizeConstraints()) {
            origin->filter = filter;
            origin->filter.updateEmptyFull();
        }
        origin->relayTxes = true;
        return true;
    }
Esempio n. 11
0
void querySeqr(BloomFilter & myFilter, const string & seq, size_t & fHit) {
    if (seq.size() < opt::kmerLen) return;
    uint64_t fhVal, rhVal;
    if(myFilter.contains(seq.c_str(), fhVal, rhVal)) {
        #ifdef _OPENMP
        #pragma omp atomic
        #endif
        ++fHit;
    }
    for (size_t i = 1; i < seq.size() - opt::kmerLen + 1; i++) {
        if(myFilter.contains(fhVal, rhVal, seq[i-1], seq[i+opt::kmerLen-1])) {
            #ifdef _OPENMP
            #pragma omp atomic
            #endif
            ++fHit;
        }
    }
}
Esempio n. 12
0
void loadSeqr(BloomFilter & BloomFilterFilter, const string & seq) {
    if (seq.size() < opt::kmerLen) return;
    string kmer = seq.substr(0,opt::kmerLen);
    RollingHashIterator itr(seq, opt::kmerLen, opt::nhash);
	while (itr != itr.end()) {
		BloomFilterFilter.insert(*itr);
		itr++;
	}
}
TEST(BloomFilterTest, JsonDeserialize3rdParty)
{
  // A bloom filter that conforms to the JSON API but was generated by an
  // independent 3rd party implementation. It contains the strings "Kermit" and
  // "MissPiggy".
  const std::string json =
    "{\"bitmap\":\"J+i5Mg==\",\"total_bits\":32,\"bits_per_entry\":12,"
     "\"hash0\":{\"k0\":6547054200929830170,\"k1\":9813628641652032020},"
     "\"hash1\":{\"k0\":15888472079188754020,\"k1\":14822504794822470401}}";

  BloomFilter* bf = BloomFilter::from_json(json);

  EXPECT_NE(bf, nullptr);
  EXPECT_TRUE(bf->check("Kermit"));
  EXPECT_TRUE(bf->check("MissPiggy"));
  EXPECT_FALSE(bf->check("Gonzo"));
  EXPECT_FALSE(bf->check("Animal"));

  delete bf; bf = nullptr;
}
Esempio n. 14
0
void querySeq(BloomFilter & myFilter, const string & seq, size_t & fHit) {
    if (seq.size() < opt::kmerLen) return;
    for (size_t i = 0; i < seq.size() - opt::kmerLen + 1; i++) {
        if(myFilter.contains(seq.c_str()+i)) {
            #ifdef _OPENMP
            #pragma omp atomic
            #endif
            ++fHit;
        }
    }
}
TEST(BloomFilterTest, JsonDeserializeExtraFields)
{
  // A bloom filter that contains some additional fields that our implementation
  // does not recognize. This should be accepted, as it represents a
  // backwards-compatible change to the filter.
  const std::string json =
    "{\"bitmap\":\"J+i5Mg==\",\"total_bits\":32,\"bits_per_entry\":12,"
     "\"hash0\":{\"k0\":6547054200929830170,\"k1\":9813628641652032020},"
     "\"hash1\":{\"k0\":15888472079188754020,\"k1\":14822504794822470401},"
     "\"future\": \"unknown\"}";

  BloomFilter* bf = BloomFilter::from_json(json);

  EXPECT_NE(bf, nullptr);
  EXPECT_TRUE(bf->check("Kermit"));
  EXPECT_TRUE(bf->check("MissPiggy"));
  EXPECT_FALSE(bf->check("Gonzo"));
  EXPECT_FALSE(bf->check("Animal"));

  delete bf; bf = nullptr;
}
Esempio n. 16
0
vector<uint256> Claims::claims(BloomFilter& filter) const {
    int64_t fee;
    vector<Transaction> txns = transactions(fee);

    vector<uint256> hashes;
    for (vector<Transaction>::const_iterator tx = txns.begin(); tx != txns.end(); ++tx) {
        if (filter.isRelevantAndUpdate(*tx))
            hashes.push_back(tx->getHash());
    }
    
    return hashes;
}
Esempio n. 17
0
  double _eIntersect(BloomFilter &b)
  {
	  double dot = 0;
	  //cout << fsize << "==" << filter.size() << endl;
	  for (size_t i =0; i < fsize; i++)
	    if (filter[i] == 1 || b.filter[i] == 1)
	      dot ++;
	  double union_size = _eUnion(b);
	  double e = _esize() + b._esize() - union_size;
	  
	  return e;
	  
  }
Esempio n. 18
0
void querySeqx(BloomFilter & myFilter, const string & seq, size_t & fHit) {
    if (seq.size() < opt::kmerLen) return;
    for (size_t i = 0; i < seq.size() - opt::kmerLen + 1; i++) {
        string kmer = seq.substr(i, opt::kmerLen);
        getCanon(kmer);
        if(myFilter.containsXxh(kmer.c_str())) {
            #ifdef _OPENMP
            #pragma omp atomic
            #endif
            ++fHit;
        }
    }
}
TEST(BloomFilterTest, NumEntriesAndFalsePositiveRate)
{
  BloomFilter* bf = BloomFilter::for_num_entries_and_fp_prob(2, 0.0001);

  bf->add("Kermit");
  bf->add("MissPiggy");

  EXPECT_TRUE(bf->check("Kermit"));
  EXPECT_TRUE(bf->check("MissPiggy"));
  EXPECT_FALSE(bf->check("Gonzo"));
  EXPECT_FALSE(bf->check("Animal"));

  delete bf; bf = nullptr;
}
static void testBloomFilter()
{
	HashParams hashParams(10);
	BloomFilter* bf = new BloomFilter(&hashParams, 1000);

	REQUIRE(bf->get(key1.data, key1.len) == false);
	REQUIRE(bf->get(key1.data, key1.len) == false);

	bf->set(key1.data, key1.len);
	REQUIRE(bf->get(key1.data, key1.len) == true);
	
	REQUIRE(bf->get(key2.data, key2.len) == false);
	bf->set(key2.data, key2.len);
	REQUIRE(bf->get(key2.data, key2.len) == true);
	REQUIRE(bf->get(key1.data, key1.len) == true);

	delete bf;
}
Esempio n. 21
0
int
main()
{
  BloomFilter<12, FilterChecker> *filter = new BloomFilter<12, FilterChecker>();
  MOZ_RELEASE_ASSERT(filter);

  FilterChecker one(1);
  FilterChecker two(0x20000);
  FilterChecker many(0x10000);
  FilterChecker multiple(0x20001);

  filter->add(&one);
  MOZ_RELEASE_ASSERT(filter->mightContain(&one),
             "Filter should contain 'one'");

  MOZ_RELEASE_ASSERT(!filter->mightContain(&multiple),
             "Filter claims to contain 'multiple' when it should not");

  MOZ_RELEASE_ASSERT(filter->mightContain(&many),
             "Filter should contain 'many' (false positive)");

  filter->add(&two);
  MOZ_RELEASE_ASSERT(filter->mightContain(&multiple),
             "Filter should contain 'multiple' (false positive)");

  // Test basic removals
  filter->remove(&two);
  MOZ_RELEASE_ASSERT(!filter->mightContain(&multiple),
             "Filter claims to contain 'multiple' when it should not after two "
             "was removed");

  // Test multiple addition/removal
  const size_t FILTER_SIZE = 255;
  for (size_t i = 0; i < FILTER_SIZE - 1; ++i)
    filter->add(&two);

  MOZ_RELEASE_ASSERT(filter->mightContain(&multiple),
             "Filter should contain 'multiple' after 'two' added lots of times "
             "(false positive)");

  for (size_t i = 0; i < FILTER_SIZE - 1; ++i)
    filter->remove(&two);

  MOZ_RELEASE_ASSERT(!filter->mightContain(&multiple),
             "Filter claims to contain 'multiple' when it should not after two "
             "was removed lots of times");

  // Test overflowing the filter buckets
  for (size_t i = 0; i < FILTER_SIZE + 1; ++i)
    filter->add(&two);

  MOZ_RELEASE_ASSERT(filter->mightContain(&multiple),
             "Filter should contain 'multiple' after 'two' added lots more "
             "times (false positive)");

  for (size_t i = 0; i < FILTER_SIZE + 1; ++i)
    filter->remove(&two);

  MOZ_RELEASE_ASSERT(filter->mightContain(&multiple),
             "Filter claims to not contain 'multiple' even though we should "
             "have run out of space in the buckets (false positive)");
  MOZ_RELEASE_ASSERT(filter->mightContain(&two),
             "Filter claims to not contain 'two' even though we should have "
             "run out of space in the buckets (false positive)");

  filter->remove(&one);

  MOZ_RELEASE_ASSERT(!filter->mightContain(&one),
             "Filter should not contain 'one', because we didn't overflow its "
             "bucket");

  filter->clear();

  MOZ_RELEASE_ASSERT(!filter->mightContain(&multiple),
             "clear() failed to work");

  return 0;
}
Esempio n. 22
0
int main(void)
{
	vector<long long> BloomDataAddTime;
	vector<long long> BloomDataSubTime;
	vector<unsigned long long> BloomDataSize;
	
	std::cout << "ブルームフィルタテスト" << std::endl;
	std::cout << "測定開始" << std::endl;
	for (int count = 1; count <= 100; count++)
	{
		BloomFilter BF;
		auto Start = std::chrono::system_clock::now();
		for (int i = 0; i < 100; i++)
		{
			BF.AddText(to_string(i));
			std::cout << BF.GetFilter() << std::endl;
		}
		auto End = std::chrono::system_clock::now();
		auto Time = End - Start;
		auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(Time).count();
		BloomDataAddTime.push_back(ms);
		BloomDataSize.push_back(sizeof(BF));

		Start = std::chrono::system_clock::now();
		for (int i = 0; i < 100; i++)
		{
			BF.SubText(to_string(i));
			std::cout << BF.GetFilter() << std::endl;
		}
		End = std::chrono::system_clock::now();
		Time = End - Start;
		ms = std::chrono::duration_cast<std::chrono::milliseconds>(Time).count();
		BloomDataSubTime.push_back(ms);
	}
	std::cout << "測定終了" << std::endl;

	std::cout << "データ追加" << std::endl;
	std::sort(BloomDataAddTime.begin(), BloomDataAddTime.end());

	std::cout << "最低値" << std::endl;
	std::cout << "AddTime : "<< BloomDataAddTime.front() << std::endl;
	std::cout << "AddSize : " << BloomDataSize.front() << std::endl;

	std::cout << "最高値" << std::endl;
	std::cout << "AddTime : " << BloomDataAddTime.back() << std::endl;
	std::cout << "AddSize : " << BloomDataSize.back() << std::endl;

	std::cout << "中央値" << std::endl;
	if (BloomDataAddTime.size() % 2 == 0)
	{
		int pos = BloomDataAddTime.size() / 2;
		std::cout << "AddTime : " << (BloomDataAddTime[pos] + BloomDataAddTime[pos + 1]) / 2 << std::endl;
	}
	else
	{
		std::cout << "AddTime : " << BloomDataAddTime[(int)(BloomDataAddTime.size() / 2+1)]<< std::endl;
	}
	if (BloomDataSize.size() % 2 == 0)
	{
		int pos = BloomDataSize.size() / 2;
		std::cout << "AddSize : " << (BloomDataSize[pos] + BloomDataSize[pos + 1]) / 2 << std::endl;
	}
	else
	{
		std::cout << "AddSize : " << BloomDataSize[(int) (BloomDataSize.size() / 2 + 1)] << std::endl;
	}

	std::cout << "平均値" << std::endl;
	long long Average = 0;
	for (auto Time : BloomDataAddTime)
	{
		Average += Time;
	}
	std::cout << "AddTime : " << (float) Average / BloomDataAddTime.size() << std::endl;
	Average = 0;
	for (auto Time : BloomDataSize)
	{
		Average += Time;
	}
	
	std::cout << "データサイズ" << std::endl;
	std::sort(BloomDataSize.begin(), BloomDataSize.end());
	std::cout << "AddSize : " << (float) Average / BloomDataSize.size() << std::endl;

	std::cout << "データ削除" << std::endl;
	std::sort(BloomDataSubTime.begin(), BloomDataSubTime.end());

	std::cout << "最低値" << std::endl;
	std::cout << "SubTime : " << BloomDataSubTime.front() << std::endl;

	std::cout << "最高値" << std::endl;
	std::cout << "SubTime : " << BloomDataSubTime.back() << std::endl;

	std::cout << "中央値" << std::endl;
	if (BloomDataSubTime.size() % 2 == 0)
	{
		int pos = BloomDataSubTime.size() / 2;
		std::cout << "SubTime : " << (BloomDataSubTime[pos] + BloomDataSubTime[pos + 1]) / 2 << std::endl;
	}
	else
	{
		std::cout << "SubTime : " << BloomDataSubTime[(int) (BloomDataSubTime.size() / 2 + 1)] << std::endl;
	}

	std::cout << "平均値" << std::endl;
	Average = 0;
	for (auto Time : BloomDataSubTime)
	{
		Average += Time;
	}
	std::cout << "SubTime : " << (float) Average / BloomDataSubTime.size() << std::endl;
	std::cout << std::endl;
	
	
	vector<long long> CountingDataAddTime;
	vector<long long> CountingDataSubTime;
	vector<unsigned long long> CountingDataSize;
	std::cout << "カウンティングフィルタテスト" << std::endl;
	std::cout << "測定開始" << std::endl;
	for (int count = 1; count <= 100; count++)
	{
		CountingFilter CF;
		auto Start = std::chrono::system_clock::now();
		for (int i = 0; i < 100; i++)
		{
			CF.AddText(to_string(i));
			std::cout << CF.GetFilter() << std::endl;
		}
		auto End = std::chrono::system_clock::now();
		auto Time = End - Start;
		auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(Time).count();
		CountingDataAddTime.push_back(ms);
		CountingDataSize.push_back(sizeof(CF));

		Start = std::chrono::system_clock::now();
		for (int i = 0; i < 100; i++)
		{
			CF.SubText(to_string(i));
			std::cout << CF.GetFilter() << std::endl;
		}
		End = std::chrono::system_clock::now();
		Time = End - Start;
		ms = std::chrono::duration_cast<std::chrono::milliseconds>(Time).count();
		CountingDataSubTime.push_back(ms);
	}
	std::cout << "測定終了" << std::endl;

	std::cout << "データ追加" << std::endl;
	std::sort(CountingDataAddTime.begin(), CountingDataAddTime.end());

	std::cout << "最低値" << std::endl;
	std::cout << "AddTime : " << CountingDataAddTime.front() << std::endl;
	std::cout << "AddSize : " << CountingDataSize.front() << std::endl;

	std::cout << "最高値" << std::endl;
	std::cout << "AddTime : " << CountingDataAddTime.back() << std::endl;
	std::cout << "AddSize : " << CountingDataSize.back() << std::endl;

	std::cout << "中央値" << std::endl;
	if (CountingDataAddTime.size() % 2 == 0)
	{
		int pos = CountingDataAddTime.size() / 2;
		std::cout << "AddTime : " << (CountingDataAddTime[pos] + CountingDataAddTime[pos + 1]) / 2 << std::endl;
	}
	else
	{
		std::cout << "AddTime : " << CountingDataAddTime[(int) (CountingDataAddTime.size() / 2 + 1)] << std::endl;
	}
	if (CountingDataSize.size() % 2 == 0)
	{
		int pos = CountingDataSize.size() / 2;
		std::cout << "AddSize : " << (CountingDataSize[pos] + CountingDataSize[pos + 1]) / 2 << std::endl;
	}
	else
	{
		std::cout << "AddSize : " << CountingDataSize[(int) (CountingDataSize.size() / 2 + 1)] << std::endl;
	}

	std::cout << "平均値" << std::endl;
	Average = 0;
	for (auto Time : CountingDataAddTime)
	{
		Average += Time;
	}
	std::cout << "AddTime : " << (float) Average / CountingDataAddTime.size() << std::endl;
	Average = 0;
	for (auto Time : CountingDataSize)
	{
		Average += Time;
	}

	std::cout << "データサイズ" << std::endl;
	std::sort(CountingDataSize.begin(), CountingDataSize.end());
	std::cout << "AddSize : " << (float) Average / CountingDataSize.size() << std::endl;

	std::cout << "データ削除" << std::endl;
	std::sort(CountingDataSubTime.begin(), CountingDataSubTime.end());

	std::cout << "最低値" << std::endl;
	std::cout << "SubTime : " << CountingDataSubTime.front() << std::endl;

	std::cout << "最高値" << std::endl;
	std::cout << "SubTime : " << CountingDataSubTime.back() << std::endl;

	std::cout << "中央値" << std::endl;
	if (CountingDataSubTime.size() % 2 == 0)
	{
		int pos = CountingDataSubTime.size() / 2;
		std::cout << "SubTime : " << (CountingDataSubTime[pos] + CountingDataSubTime[pos + 1]) / 2 << std::endl;
	}
	else
	{
		std::cout << "SubTime : " << CountingDataSubTime[(int) (CountingDataSubTime.size() / 2 + 1)] << std::endl;
	}

	std::cout << "平均値" << std::endl;
	Average = 0;
	for (auto Time : CountingDataSubTime)
	{
		Average += Time;
	}
	std::cout << "SubTime : " << (float) Average / CountingDataSubTime.size() << std::endl;
	
	getchar();
	return 0;
}
Esempio n. 23
0
void loadSeq(BloomFilter & myFilter, const string & seq) {
    if (seq.size() < opt::kmerLen) return;
    for (size_t i = 0; i < seq.size() - opt::kmerLen + 1; i++) {
        myFilter.insert(seq.c_str()+i);
    }
}
Esempio n. 24
0
int main(int argc, char *argv[]) {
  if (argc < 2) {
    cout << "usage: BloomFilterTest <bits>\n where bits is the bit count per variable" << endl;
    return 1;
  }
  unsigned int bits = atoi(argv[1]), wrong = 0, counter = 0;
  cout << "bits=" << bits << endl;
  int limit = 1000000;

  // take bits as parameter
  BloomFilter BF (limit, (size_t) bits, (uint32_t) time(NULL));



  // insert ints 0,...,1M
  for (int j=0; j<limit;j++)
    BF.insert(j);

  // assert 0,...,1M
  for (int j=0; j<limit;j++)
    assert(BF.contains(j));
  
  for (int j=limit; j < 2*limit ;j++) {
    if (BF.contains(j))
      wrong++;
    counter++;
  }

  Kmer::set_k(31);
  Kmer km("ACGTACGTACGTACGTACGTACGTACGTACG");
  printf("sizeof(Kmer) == %d\n", (int) sizeof(Kmer));
  printf("sizeof(km) == %d\n", (int) sizeof(km));
  printf("pointer cast == %p\n", (const void*) &km);

  cout << "Count == " << BF.count() << endl;
  cout << "Contains(km) == " << BF.contains(km) << endl;
  cout << "Insert(km) == " << BF.insert(km) << endl;
  BF.count();
  cout << "Contains(km) == " << BF.contains(km) << endl;
  assert(BF.contains(km));
  
  BF.count();
  Kmer km2("ACGTACGTACGTACGTACGTACGTACGTAGG"); // AGG vs ACG in the end
  cout << "Contains(km2) == " << BF.contains(km2) << endl;
  cout << "Insert(km2) == " << BF.insert(km2) << endl;
  cout << "Contains(km2) == " << BF.contains(km2) << endl;
  assert(BF.contains(km2));

  BF.count();
  
  FILE *fp = fopen("testBloom.bf", "wb");
  BF.WriteBloomFilter(fp);
  fclose(fp);



  // compute false positive rate
  printf("False positive ratio: %.6f\n", wrong / (0.0 + counter));
  cout << &argv[0][2] << " completed successfully" << endl;
}
Esempio n. 25
0
int main()
{
  BloomFilter<12, FilterChecker> *filter = new BloomFilter<12, FilterChecker>();

  FilterChecker one(1);
  FilterChecker two(0x20000);
  FilterChecker many(0x10000);
  FilterChecker multiple(0x20001);

  filter->add(&one);
  if (!filter->mayContain(&one)) {
    fail("Filter should contain 'one'");
    return -1;
  }

  if (filter->mayContain(&multiple)) {
    fail("Filter claims to contain 'multiple' when it should not");
    return -1;
  }

  if (!filter->mayContain(&many)) {
    fail("Filter should contain 'many' (false positive)");
    return -1;
  }

  filter->add(&two);
  if (!filter->mayContain(&multiple)) {
    fail("Filter should contain 'multiple' (false positive)");
    return -1;
  }

  // Test basic removals
  filter->remove(&two);
  if (filter->mayContain(&multiple)) {
    fail("Filter claims to contain 'multiple' when it should not after two was "
         "removed");
    return -1;
  }

  // Test multiple addition/removal
  const unsigned FILTER_SIZE = 255;
  for (unsigned i = 0; i < FILTER_SIZE - 1; ++i) {
    filter->add(&two);
  }
  if (!filter->mayContain(&multiple)) {
    fail("Filter should contain 'multiple' after 'two' added lots of times "
         "(false positive)");
    return -1;
  }
  for (unsigned i = 0; i < FILTER_SIZE - 1; ++i) {
    filter->remove(&two);
  }
  if (filter->mayContain(&multiple)) {
    fail("Filter claims to contain 'multiple' when it should not after two was "
         "removed lots of times");
    return -1;
  }

  // Test overflowing the filter buckets
  for (unsigned i = 0; i < FILTER_SIZE + 1; ++i) {
    filter->add(&two);
  }
  if (!filter->mayContain(&multiple)) {
    fail("Filter should contain 'multiple' after 'two' added lots more times "
         "(false positive)");
    return -1;
  }
  for (unsigned i = 0; i < FILTER_SIZE + 1; ++i) {
    filter->remove(&two);
  }
  if (!filter->mayContain(&multiple)) {
    fail("Filter claims to not contain 'multiple' even though we should have "
         "run out of space in the buckets (false positive)");
    return -1;
  }
  if (!filter->mayContain(&two)) {
    fail("Filter claims to not contain 'two' even though we should have run "
         "out of space in the buckets (false positive)");
    return -1;
  }

  filter->remove(&one);
  if (filter->mayContain(&one)) {
    fail("Filter should not contain 'one', because we didn't overflow its "
         "bucket");
    return -1;
  }
  
  filter->clear();
  if (filter->mayContain(&multiple)) {
    fail("clear() failed to work");
    return -1;
  }

  return 0;
}