void dablooms_hash_func(counting_bloom_t *bloom, const char *key, size_t key_len, uint32_t *hashes) { int i; uint32_t checksum[4]; MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum); MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum); uint32_t h1 = checksum[0]; uint32_t h2 = checksum[1]; for (i = 0; i < bloom->nfuncs; i++) { hashes[i] = (h1 + i * h2) % bloom->counts_per_func; } }
/** * Adds a new key to the set * @arg s The set to add to * @arg key The key to add */ void set_add(set_t *s, char *key) { uint32_t i; uint64_t out[2]; MurmurHash3_x64_128(key, strlen(key), 0, &out); switch (s->type) { case EXACT: // Check if this element is already added for (i=0; i < s->store.s.count; i++) { if (out[1] == s->store.s.hashes[i]) return; } // Check if we can fit this in the array if (i < SET_MAX_EXACT) { s->store.s.hashes[i] = out[1]; s->store.s.count++; return; } // Otherwise, force conversion to HLL // and purposely fall through to add the // element to the HLL convert_exact_to_approx(s); case APPROX: hll_add_hash(&s->store.h, out[1]); break; } }
//128bit; template<> MurmurHash<128>::result_type MurmurHash<128>::salt(bool fixed) { if (fixed) { result_type ret; uint32_t* p = (uint32_t*)&ret; (*p++) = 0x58132134; (*p++) = 0x94827513; (*p++) = 0x16574893; (*p) = 0x17932864; return ret; } result_type h; uint32_t* p0 = (uint32_t*)&h; std::time_t now = std::time(nullptr); char* nowstr = std::ctime(&now); MurmurHash3_x86_32((void*)nowstr, (int)std::strlen(nowstr), 0, (void*)p0); std::vector<zks::u8string> mac_addrs = get_mac_address(); for (size_t i = 0; i < mac_addrs.size(); ++i) { #ifdef _ZKS64 MurmurHash3_x64_128((void*)mac_addrs[i].data(), (int)mac_addrs[i].size(), *p0, (void*)&h); #else MurmurHash3_x86_128((void*)mac_addrs[i].data(), (int)mac_addrs[i].size(), *p0, (void*)&h); #endif } return h; }
static avl_unit_val_t *qp_avl_lookup_s(struct avltree *t, avl_unit_val_t *v, int maxj) { struct avltree_node *node; avl_unit_val_t *v2; uint32_t j, j2; uint32_t hk[4]; assert(avltree_size(t) < UINT64_MAX); MurmurHash3_x64_128(v->name, strlen(v->name), 67, hk); memcpy(&v->hk.k, hk, 8); for (j = 0; j < maxj; j++) { v->hk.k = (v->hk.k + (j * 2)); node = avltree_inline_lookup(&v->node_hk, t); if (node) { /* it's almost but not entirely certain that node is * related to v. in the general case, j is also not * constrained to be v->hk.p */ v2 = avltree_container_of(node, avl_unit_val_t, node_hk); if (!strcmp(v->name, v2->name)) return v2; } } /* warn crit */ return NULL; }
/* * CmsTopnEstimateItemFrequency calculates estimated frequency for the given * item and returns it. */ static Frequency CmsTopnEstimateItemFrequency(CmsTopn *cmsTopn, Datum item, TypeCacheEntry *itemTypeCacheEntry) { uint64 hashValueArray[2] = {0, 0}; StringInfo itemString = makeStringInfo(); Frequency frequency = 0; /* if datum is toasted, detoast it */ if (itemTypeCacheEntry->typlen == -1) { Datum detoastedItem = PointerGetDatum(PG_DETOAST_DATUM(item)); ConvertDatumToBytes(detoastedItem, itemTypeCacheEntry, itemString); } else { ConvertDatumToBytes(item, itemTypeCacheEntry, itemString); } /* * Calculate hash values for the given item and then get frequency estimate * with these hashed values. */ MurmurHash3_x64_128(itemString->data, itemString->len, MURMUR_SEED, &hashValueArray); frequency = CmsTopnEstimateHashedItemFrequency(cmsTopn, hashValueArray); return frequency; }
template<> MurmurHash<64>::result_type MurmurHash<64>::hash(const void* key, size_t n, result_type seed) { uint64_t res[2]; uint32_t* s = (uint32_t*)&seed; MurmurHash3_x64_128(key, (int)n, *s, (void*)res); return res[0]; }
static PyObject * mmh3_hash128(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; int target_str_len; uint32_t seed = 0; uint64_t result[2]; char x64arch = 1; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", NULL}; if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|iB", kwlist, &target_str, &target_str_len, &seed, &x64arch)) { return NULL; } if (x64arch == 1) { MurmurHash3_x64_128(target_str, target_str_len, seed, result); } else { MurmurHash3_x86_128(target_str, target_str_len, seed, result); } /** * _PyLong_FromByteArray is not a part of official Python/C API * and can be displaced (although it is practically stable). cf. * https://mail.python.org/pipermail/python-list/2006-August/372368.html */ PyObject *retval = _PyLong_FromByteArray((unsigned char *)result, 16, 1, 0); return retval; }
static PyObject * mmh3_hash64(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; int target_str_len; uint32_t seed = 0; int64_t result[2]; char x64arch = 1; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", NULL}; if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|iB", kwlist, &target_str, &target_str_len, &seed, &x64arch)) { return NULL; } if (x64arch == 1) { MurmurHash3_x64_128(target_str, target_str_len, seed, result); } else { MurmurHash3_x86_128(target_str, target_str_len, seed, result); } PyObject *retval = Py_BuildValue("ll", result[0], result[1]); return retval; }
static PyObject * mmh3_hash_bytes(PyObject *self, PyObject *args, PyObject *keywds) { const char *target_str; int target_str_len; uint32_t seed = 0; uint32_t result[4]; char x64arch = 1; static char *kwlist[] = {(char *)"key", (char *)"seed", (char *)"x64arch", NULL}; if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|iB", kwlist, &target_str, &target_str_len, &seed, &x64arch)) { return NULL; } if (x64arch == 1) { MurmurHash3_x64_128(target_str, target_str_len, seed, result); } else { MurmurHash3_x86_128(target_str, target_str_len, seed, result); } char bytes[16]; memcpy(bytes, result, 16); return PyBytes_FromStringAndSize(bytes, 16); }
hash_u getHash(const char * seq, int length) { //for ( int i = 0; i < length; i++ ) { cout << *(seq + i); } cout << endl; bool use64 = length > 16; char data[use64 ? 8 : 4]; #ifdef ARCH_32 MurmurHash3_x86_32(seq, length > 16 ? 16 : length, seed, data); if ( use64 ) { MurmurHash3_x86_32(seq + 16, length - 16, seed, data + 4); } #else MurmurHash3_x64_128(seq, length, seed, data); #endif hash_u hash; if ( use64 ) { hash.hash64 = *((hash64_t *)data); } else { hash.hash32 = *((hash32_t *)data); } return hash; }
void Hash::append(const Hash& other) { std::ostringstream stream; stream << _hash[0] << "_" << _hash[1] << other._hash[0] << "_" << other._hash[1]; MurmurHash3_x64_128(stream.str().c_str(), (int32_t)(stream.str().length() * sizeof(uint8_t)), 0, &_hash[0]); }
template<> MurmurHash<256>::result_type MurmurHash<256>::hash(const void* key, size_t n, result_type seed) { result_type h; uint32_t* ph = (uint32_t*)&h; uint32_t* ps = (uint32_t*)&seed; #ifdef _ZKS64 MurmurHash3_x64_128(key, (int)n, *(ps), (void*)ph); ++ps; ph += 4; MurmurHash3_x64_128(key, (int)n, *(ps), (void*)ph); #else MurmurHash3_x86_128(key, (int)n, *(ps), (void*)ph); ++ps; ph += 4; MurmurHash3_x86_128(key, (int)n, *(ps), (void*)ph); #endif return h; }
/** * Adds a new key to the HLL * @arg h The hll to add to * @arg key The key to add */ void hll_add(hll_t *h, char *key) { // Compute the hash value of the key uint64_t out[2]; MurmurHash3_x64_128(key, strlen(key), 0, &out); // Add the hashed value hll_add_hash(h, out[1]); }
// Ref: Adam Kirsch and Michael Mitzenmacher // Less Hashing, Same Performance: Building a Better Bloom Filter void bloom_filter_t::generate_indexes(const void *key, size_t len) const { uint64_t hash[2]; MurmurHash3_x64_128(key, len, seed_, hash); for (size_t i = 0; i < num_hashes_; i++) indexes_[i] = (hash[0] + i * hash[1]) % num_buckets_; }
void MurmurHash3_128_wrapper(const void *key, uint32_t len, uint32_t seed, void *out) { #if defined(PLATFORM64) MurmurHash3_x64_128(key, len, seed, out); #else MurmurHash3_x86_128(key, len, seed, out); #endif }
void ring_murmurhash3_x64_128(void *pPointer) { char *key = NULL; int keylen; int seed = 0; uint64_t out[2]; int ret_type = 0; List *tmp_list, *ret_val; if (RING_API_PARACOUNT < 2 || RING_API_PARACOUNT > 3) { RING_API_ERROR(RING_API_MISS2PARA); return ; } if (!RING_API_ISSTRING(1)) { RING_API_ERROR("murmurhash3_x64_128 expects the first parameter to be a string"); return; } if (!RING_API_ISNUMBER(2)) { RING_API_ERROR("murmurhash3_x64_128 expects the first parameter to be an integer"); return; } key = RING_API_GETSTRING(1); keylen = strlen(key); seed = RING_API_GETNUMBER(2); if (RING_API_PARACOUNT == 3) { if (RING_API_ISNUMBER(3)) { ret_type = RING_API_GETNUMBER(3); if (!is_bool(ret_type)) { RING_API_ERROR("Third parameter should be boolean value\n"); } } else { RING_API_ERROR("murmurhash3_x64_128 expects the third parameter to be an integer\n"); } } MurmurHash3_x64_128(key, keylen, seed, out); ret_val = RING_API_NEWLIST; tmp_list = ring_list_newlist_gc(((VM *)pPointer)->pRingState, ret_val); for (int i = 0; i < 2; i++) { if (ret_type) { char tmp[50]; LONG2HEX(tmp, out[i]); ring_list_addstring2(tmp_list, (char*) tmp, strlen((char *) tmp)); } else { ring_list_addint(tmp_list, out[i]); } } RING_API_RETLIST(ret_val); }
template<> MurmurHash<128>::result_type MurmurHash<128>::hash(const void* key, size_t n, result_type seed) { result_type h; #ifdef _ZKS64 MurmurHash3_x64_128(key, (int)n, *((uint32_t*)&seed), (void*)&h); #else MurmurHash3_x86_128(key, (int)n, *((uint32_t*)&seed), (void*)&h); #endif return h; }
uint64_t hashString2(char *s, int len) { uint64_t hash_val[2]; uint32_t seed = 0xAAAAAAAA; #if UINTPTR_MAX == 0xffffffff MurmurHash3_x86_128((void *) s, len, seed, (void *) &hash_val); #else MurmurHash3_x64_128((void *) s, len, seed, (void *) &hash_val); #endif return hash_val[0]; }
static void hash_func(BLOOM *bloom, const char *key, uint32_t key_len, uint32_t *hashes) { int i; uint32_t checksum[4]; MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum); uint32_t h1 = checksum[0]; uint32_t h2 = checksum[1]; for (i = 0; i < bloom->num_funcs; i++) { hashes[i] = (h1 + i * h2) % bloom->size; } }
int interval_belongs (interval_t * interval, uint64_t key) { struct _hkey_t hkey; MurmurHash3_x64_128 ((const void *) &key, (int) sizeof (uint64_t), 0, (void *) &hkey); return interval_belongs_h (interval, &hkey); }
const generator_t::digest_type& generator_t::digest() const { if( !digest_) { digest_ = digest_type(); MurmurHash3_x64_128( reinterpret_cast<void*>( const_cast<char*>( ss_.str().c_str())), ss_.str().size(), 0, // seed reinterpret_cast<void*>( const_cast<digest_type::iterator>( digest().begin()))); } return digest_.get(); }
CMSData CountMinSketch::get(const void* key, int num_bytes) const { // Return the min count in all vectors CMSData min = std::numeric_limits<uint8_t>::max(); for(size_t i = 0; i < m_vectors.size(); ++i) { int64_t h[2]; MurmurHash3_x64_128(key, num_bytes, m_hashes[i], &h); size_t idx = h[0] % m_vectors[i].size(); if(m_vectors[i][idx] < min) min = m_vectors[i][idx]; } return min; }
/* * UpdateSketchInPlace updates sketch inside CmsTopn in-place with given item * and returns new estimated frequency for the given item. */ static Frequency UpdateSketchInPlace(CmsTopn *cmsTopn, Datum newItem, TypeCacheEntry *newItemTypeCacheEntry) { uint32 hashIndex = 0; uint64 hashValueArray[2] = {0, 0}; StringInfo newItemString = makeStringInfo(); Frequency newFrequency = 0; Frequency minFrequency = MAX_FREQUENCY; /* get hashed values for the given item */ ConvertDatumToBytes(newItem, newItemTypeCacheEntry, newItemString); MurmurHash3_x64_128(newItemString->data, newItemString->len, MURMUR_SEED, &hashValueArray); /* * Estimate frequency of the given item from hashed values and calculate new * frequency for this item. */ minFrequency = CmsTopnEstimateHashedItemFrequency(cmsTopn, hashValueArray); newFrequency = minFrequency + 1; /* * We can create an independent hash function for each index by using two hash * values from the Murmur Hash function. This is a standard technique from the * hashing literature for the additional hash functions of the form * g(x) = h1(x) + i * h2(x) and does not hurt the independence between hash * function. For more information you can check this paper: * http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf */ for (hashIndex = 0; hashIndex < cmsTopn->sketchDepth; hashIndex++) { uint64 hashValue = hashValueArray[0] + (hashIndex * hashValueArray[1]); uint32 widthIndex = hashValue % cmsTopn->sketchWidth; uint32 depthOffset = hashIndex * cmsTopn->sketchWidth; uint32 counterIndex = depthOffset + widthIndex; /* * Selective update to decrease effect of collisions. We only update * counters less than new frequency because other counters are bigger * due to collisions. */ Frequency counterFrequency = cmsTopn->sketch[counterIndex]; if (newFrequency > counterFrequency) { cmsTopn->sketch[counterIndex] = newFrequency; } } return newFrequency; }
/** * Hashit builds a hash of the key and returns the bucket to use and a * fingerprint value. * @param key 64-bit key to hash * @param func hash function to use * @param *b pointer to location of bucket return value * @param *fp pointer to location of fingerprint return value */ static void hashmap_hashit(struct pna_hashmap *map, void *key, int func, uint32_t *b, uint32_t *fp) { uint64_t out[2]; /* main hashing routine */ #define C0 0xa96347c5 #define C1 0xe65ac2d3 MurmurHash3_x64_128(key, map->key_size, func ? C0 : C1, out); //printf("hash: 0x%16llx:%16llx\n", out[0], out[1]); *b = out[0] & map->bkt_mask; *fp = out[1] & map->fp_mask; }
/* * Perform the actual hashing for `key` * * Only call the hash once to get a pair of initial values (h1 and * h2). Use these values to generate all hashes in a quick loop. * * See paper by Kirsch, Mitzenmacher [2006] * http://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf * * Slightly modified version from dablooms -- gvb */ inline static void hash_func(const char * data, size_t datalen, uint32_t * hashes, int nfuncs, int counts_per_func) { int i; uint32_t checksum[4], h1, h2; MurmurHash3_x64_128(data, datalen, SALT_CONSTANT, checksum); h1 = checksum[0]; h2 = checksum[1]; for (i = 0; i < nfuncs; i++) { hashes[i] = (h1 + i * h2) % counts_per_func; } }
static int qp_avl_insert(struct avltree *t, avl_unit_val_t *v) { /* * Insert with quadatic, linear probing. A unique k is assured for * any k whenever size(t) < max(uint64_t). * * First try quadratic probing, with coeff. 2 (since m = 2^n.) * A unique k is not assured, since the codomain is not prime. * If this fails, fall back to linear probing from hk.k+1. * * On return, the stored key is in v->hk.k, the iteration * count in v->hk.p. **/ struct avltree_node *tmpnode; uint32_t j, j2; uint32_t hk[4]; assert(avltree_size(t) < UINT64_MAX); MurmurHash3_x64_128(v->name, strlen(v->name), 67, hk); memcpy(&v->hk.k, hk, 8); for (j = 0; j < UINT64_MAX; j++) { v->hk.k = (v->hk.k + (j * 2)); tmpnode = avltree_insert(&v->node_hk, t); if (!tmpnode) { /* success, note iterations and return */ v->hk.p = j; return 0; } } /* warn debug */ memcpy(&v->hk.k, hk, 8); for (j2 = 1 /* tried j=0 */; j2 < UINT64_MAX; j2++) { v->hk.k = v->hk.k + j2; tmpnode = avltree_insert(&v->node_hk, t); if (!tmpnode) { /* success, note iterations and return */ v->hk.p = j + j2; return 0; } j2++; } /* warn crit */ return -1; }
int main () { uint8_t fox[] = "The quick brown fox jumps over the lazy dog."; uint8_t foxlen = 44; uint32_t fox_x86_32[1]; uint64_t fox_x86_64[2]; uint64_t fox_x64[2]; MurmurHash3_x86_32 (fox, foxlen, 0, fox_x86_32); MurmurHash3_x86_128 (fox, foxlen, 0, fox_x86_64); MurmurHash3_x64_128 (fox, foxlen, 0, fox_x64); //printf( "mm3 x86 32: %08X\n", fox_x86_32[0] ); //printf( "mm3 x86 64: %016lX %016lX\n", fox_x86_64[0], fox_x86_64[1] ); printf( "%s, %016lX%016lX\n", fox, fox_x64[0], fox_x64[1] ); }
void CountMinSketch::increment(const void* key, int num_bytes) { assert(!m_vectors.empty()); // Increment all vectors for(size_t i = 0; i < m_vectors.size(); ++i) { int64_t h[2]; MurmurHash3_x64_128(key, num_bytes, m_hashes[i], &h); size_t idx = h[0] % m_vectors[i].size(); // Perform an atomic compare and swap to increment the value // If the value has reached saturation, do not update while(1) { CMSData v = m_vectors[i][idx]; if(v == m_max_count || __sync_bool_compare_and_swap(&m_vectors[i][idx], v, v + 1)) break; } } }
static PyObject * _py_murmur3_128(PyObject *self, PyObject *args, int x86, int size) { const char *key; Py_ssize_t len; uint32_t seed = 0; unsigned char out[16]; if (!PyArg_ParseTuple(args, "s#|I", &key, &len, &seed)) { return NULL; } if (x86) { MurmurHash3_x86_128((void *)key, len, seed, &out); } else { MurmurHash3_x64_128((void *)key, len, seed, &out); } return _PyLong_FromByteArray((const unsigned char *)&out, size, 0, 0); }
// Computes our hashes void bf_compute_hashes(uint32_t k_num, char *key, uint64_t *hashes) { /** * We use the results of * 'Less Hashing, Same Performance: Building a Better Bloom Filter' * http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf, to use * g_i(x) = h1(u) + i * h2(u) mod m' * * This allows us to only use 2 hash functions h1, and h2 but generate * k unique hashes using linear combinations. This is a vast speedup * over our previous technique of 4 hashes, that used double hashing. * */ // Get the length of the key uint64_t len = strlen(key); // Compute the first hash uint64_t out[2]; MurmurHash3_x64_128(key, len, 0, &out); // Copy these out hashes[0] = out[0]; // Upper 64bits of murmur hashes[1] = out[1]; // Lower 64bits of murmur // Compute the second hash uint64_t *hash1 = (uint64_t*)&out; uint64_t *hash2 = hash1+1; SpookyHash128(key, len, 0, 0, hash1, hash2); // Copy these out hashes[2] = out[0]; // Use the upper 64bits of Spooky hashes[3] = out[1]; // Use the lower 64bits of Spooky // Compute an arbitrary k_num using a linear combination // Add a mod by the largest 64bit prime. This only reduces the // number of addressable bits by 54 but should make the hashes // a bit better. for (uint32_t i=4; i < k_num; i++) { hashes[i] = hashes[1] + ((i * hashes[3]) % 18446744073709551557U); } }