// constructor CountDistinctSketch(float ep = 0.01, float gamm = 0.90, int bucket = 7) { eps = ep; gamma = gamm; w = ceil(exp(1) / eps); d = ceil(log(1 / gamma)); total = 0; buck = bucket; // initialize counter array of arrays, C C.resize(d); unsigned int i, j; for (i = 0; i < d; i++) { C[i].resize(w); for (j = 0; j < w; j++) { HyperLogLog hll(buck); C[i][j] = hll; } } // initialize d pairwise independent hashes srand(time(NULL)); hashes = new int*[d]; for (i = 0; i < d; i++) { hashes[i] = new int[2]; genajbj(hashes, i); } }
void Bencher::go64(int num){ uint64_t hash; uint64_t hash128[2]; vector < uint64_t > hashed = vector<uint64_t>(); cout << "hashing ..." << endl; for(uint64_t i = 0; i < num ; i++){ MurmurHash3_x86_128(&i, 8, 0, &hash128); hash = hash128[0]; hashed.push_back(hash); } Hll hll(14); cout << "go" << endl; auto begin = chrono::high_resolution_clock::now(); for(uint64_t i = 0; i < num ; i++){ hll.AddItem64(hashed[i]); } double n = hll.Count64(); auto end = chrono::high_resolution_clock::now(); auto dur = end - begin; auto temps = std::chrono::duration_cast<std::chrono::milliseconds>(dur).count(); cout << n << endl << "temps : " << temps << "ms" << endl; }
void Ploter::plotCorrectedHll(string filename){ ofstream file(filename); string line; int x1, x2; double y1, y2; x1 = 0; y1 = 0; x2 = CARDMAX/STEP-1; y2 = 0; uint64_t hash; uint64_t hash128[2]; vector<double> estimates = makeVector(); int i; vector< vector<double> > tab(CARDMAX/STEP, vector<double>(TESTS)); int ca = 0; for(i = 0; i < TESTS; i ++){ cout << i << endl; Hll hll(14); for(int j = 0; j < CARDMAX; j++){ MurmurHash3_x86_128(&ca, 4, 0, &hash128); ca++; hash = hash128[0]; hll.AddItem64(hash); if(j%STEP == 0){ double count = hll.CountRaw64(); for(int i = 0; i < CARDMAX/STEP-1; i++){ if(estimates[i] <= count && count < estimates[i+1]){ x1 = estimates[i]; y1 = i*STEP; x2 = estimates[i+1]; y2 = (i+1)*STEP; count = interpolation(count, x1, y1, x2, y2); } } //count = interpolation(count, x1, y1, x2, y2); //count = (double)abs(count-j)/j; tab[j/STEP][i]=count; } } } for(int j = 0; j < CARDMAX/STEP; j++){ double sum = 0; for (int k = 0; k < TESTS; k++){ sum= sum + tab[j][k]; } //cout << sum << endl; double median = percentile(tab[j],0.5); double pct01 = percentile(tab[j],0.01); double pct99 = percentile(tab[j],0.99); file << (j*STEP) << "\t" << (double)sum/TESTS << "\t" << (double)median << "\t" << pct01 << "\t" << pct99 << endl; } }
void Ploter::plotRawHll(string filename){ ofstream file(filename); string line; uint64_t hash; uint64_t hash128[2]; int i; vector< vector<double> > tab(CARDMAX/STEP, vector<double>(TESTS)); int ca = 0; for(i = 0; i < TESTS; i ++){ cout << i << endl; Hll hll(14); for(int j = 0; j < CARDMAX; j++){ MurmurHash3_x86_128(&ca, 4, 0, &hash128); ca++; hash = hash128[0]; hll.AddItem64(hash); if(j%STEP == 0){ double count = hll.CountRaw64(); //count = (double)abs(count-j)/j; tab[j/STEP][i]=count; } } } for(int j = 0; j < CARDMAX/STEP; j++){ double sum = 0; for (int k = 0; k < TESTS; k++){ sum= sum + tab[j][k]; } //cout << sum << endl; double median = percentile(tab[j],0.5); double pct01 = percentile(tab[j],0.01); double pct99 = percentile(tab[j],0.99); file << (j*STEP) << "\t" << (double)sum/TESTS << "\t" << (double)median << "\t" << pct01 << "\t" << pct99 << endl; } }
// ----------------------------------------------------------------------------- bool Algorithms::Test_HashLinkedListSearch() { bool pass = true; HashLinkedList hll(10); for (int i = -50; i < 50; ++i) { pass = pass && !hll.Search(i); pass = pass && hll.Insert(i); pass = pass && hll.Search(i); pass = pass && hll.Delete(i); pass = pass && !hll.Search(i); } for (int i = -50; i < 50; ++i) { pass = pass && hll.Insert(i); } for (int i = -50; i < 50; ++i) { pass = pass && hll.Search(i); } return pass; }