void CountMinSketchAdd(CountMinSketch *cms, void *key, Size size, uint32_t count) { /* * Since we only have positive increments, we're using the conservative update * variant which apparently has better accuracy--albeit moderately slower. * * http://dimacs.rutgers.edu/~graham/pubs/papers/cmencyc.pdf */ uint32_t min = UINT_MAX; uint32_t i; uint64_t hash[2]; MurmurHash3_128(key, size, MURMUR_SEED, &hash); for (i = 0; i < cms->d; i++) { uint32_t start = i * cms->w; uint32_t j = (hash[0] + (i * hash[1])) % cms->w; min = Min(min, cms->table[start + j]); } for (i = 0; i < cms->d; i++) { uint32_t start = i * cms->w; uint32_t j = (hash[0] + (i * hash[1])) % cms->w; cms->table[start + j] = Max(cms->table[start + j], min + count); } cms->count += count; }
//----------------------------------------------------------------------------- String ProgramManager::generateHash(const String& programString) { //Different programs must have unique hash values. uint32_t hash[4]; MurmurHash3_128(programString.c_str(), programString.size(), 0, hash); //Generate the string char str[33]; sprintf(str, "%08x%08x%08x%08x", hash[0], hash[1], hash[2], hash[3]); return String(str); }
Bool CheckKmerInBloomFilter(BloomFilter* const bf, const Kmer word) { uint128_t last_hash = bf->seed; uint idx; uint128_t hash; for (idx = 0; idx < bf->num_hash_functions; idx++) { if (MurmurHash3_128((char*)&word, sizeof(Kmer), last_hash, &hash) == FALSE) { PrintThenDie("could not ascertain hash for kmer"); } if (CheckBit(bf->bs, hash % bf->num_bits) == 0) { return FALSE; } last_hash = hash; } return TRUE; }
uint32_t CountMinSketchEstimateCount(CountMinSketch *cms, void *key, Size size) { uint32_t count = UINT_MAX; uint32_t i; uint64_t hash[2]; MurmurHash3_128(key, size, MURMUR_SEED, &hash); for (i = 0; i < cms->d; i++) { uint32_t start = i * cms->w; uint32_t j = (hash[0] + (i * hash[1])) % cms->w; count = Min(count, cms->table[start + j]); } return count; }
void AddKmerToBloomFilter(BloomFilter* const bf, const Kmer word) { uint128_t last_hash = bf->seed; uint idx; uint128_t hash; Bool is_added = FALSE; for (idx = 0; idx < bf->num_hash_functions; idx++){ if (MurmurHash3_128((char*)&word, sizeof(Kmer), last_hash, &hash) == FALSE) { PrintThenDie("could not ascertain hash for kmer"); } if(SetBit(bf->bs, hash % bf->num_bits) == FALSE){ assert(CheckBit(bf->bs, hash % bf->num_bits) > 0); bf->num_set_bits += 1; is_added = TRUE; } last_hash = hash; } if (is_added == TRUE) { bf->num_entries_added += 1; } }