Exemplo n.º 1
0
void dablooms_hash_func(counting_bloom_t *bloom, const char *key, size_t key_len, uint32_t *hashes)
{
    int i;
    uint32_t checksum[4];

    MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum);
    MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum);
    uint32_t h1 = checksum[0];
    uint32_t h2 = checksum[1];

    for (i = 0; i < bloom->nfuncs; i++) {
        hashes[i] = (h1 + i * h2) % bloom->counts_per_func;
    }
}
Exemplo n.º 2
0
/**
 * Adds a new key to the set
 * @arg s The set to add to
 * @arg key The key to add
 */
void set_add(set_t *s, char *key) {
    uint32_t i;
    uint64_t out[2];
    MurmurHash3_x64_128(key, strlen(key), 0, &out);
    switch (s->type) {
        case EXACT:
            // Check if this element is already added
            for (i=0; i < s->store.s.count; i++) {
                if (out[1] == s->store.s.hashes[i]) return;
            }

            // Check if we can fit this in the array
            if (i < SET_MAX_EXACT) {
                s->store.s.hashes[i] = out[1];
                s->store.s.count++;
                return;
            }

            // Otherwise, force conversion to HLL
            // and purposely fall through to add the
            // element to the HLL
            convert_exact_to_approx(s);

        case APPROX:
            hll_add_hash(&s->store.h, out[1]);
            break;
    }
}
Exemplo n.º 3
0
    //128bit;
    template<> MurmurHash<128>::result_type MurmurHash<128>::salt(bool fixed)
    {
        if (fixed) {
            result_type ret;
            uint32_t* p = (uint32_t*)&ret;
            (*p++) = 0x58132134;
            (*p++) = 0x94827513;
            (*p++) = 0x16574893;
            (*p)   = 0x17932864;
            return ret;
        }

        result_type h;
        uint32_t* p0 = (uint32_t*)&h;
        std::time_t now = std::time(nullptr);
        char* nowstr = std::ctime(&now);
        MurmurHash3_x86_32((void*)nowstr, (int)std::strlen(nowstr), 0, (void*)p0);

        std::vector<zks::u8string> mac_addrs = get_mac_address();
        for (size_t i = 0; i < mac_addrs.size(); ++i) {
#ifdef _ZKS64
            MurmurHash3_x64_128((void*)mac_addrs[i].data(), (int)mac_addrs[i].size(), *p0, (void*)&h);
#else
            MurmurHash3_x86_128((void*)mac_addrs[i].data(), (int)mac_addrs[i].size(), *p0, (void*)&h);
#endif
        }
        return h;
    }
Exemplo n.º 4
0
static avl_unit_val_t *qp_avl_lookup_s(struct avltree *t, avl_unit_val_t *v,
				       int maxj)
{
	struct avltree_node *node;
	avl_unit_val_t *v2;
	uint32_t j, j2;
	uint32_t hk[4];

	assert(avltree_size(t) < UINT64_MAX);

	MurmurHash3_x64_128(v->name, strlen(v->name), 67, hk);
	memcpy(&v->hk.k, hk, 8);

	for (j = 0; j < maxj; j++) {
		v->hk.k = (v->hk.k + (j * 2));
		node = avltree_inline_lookup(&v->node_hk, t);
		if (node) {
			/* it's almost but not entirely certain that node is
			 * related to v.  in the general case, j is also not
			 * constrained to be v->hk.p */
			v2 = avltree_container_of(node, avl_unit_val_t,
						  node_hk);
			if (!strcmp(v->name, v2->name))
				return v2;
		}
	}

	/* warn crit  */
	return NULL;
}
Exemplo n.º 5
0
/*
 * CmsTopnEstimateItemFrequency calculates estimated frequency for the given
 * item and returns it.
 */
static Frequency
CmsTopnEstimateItemFrequency(CmsTopn *cmsTopn, Datum item,
                             TypeCacheEntry *itemTypeCacheEntry)
{
	uint64 hashValueArray[2] = {0, 0};
	StringInfo itemString = makeStringInfo();
	Frequency frequency = 0;

	/* if datum is toasted, detoast it */
	if (itemTypeCacheEntry->typlen == -1)
	{
		Datum detoastedItem =  PointerGetDatum(PG_DETOAST_DATUM(item));
		ConvertDatumToBytes(detoastedItem, itemTypeCacheEntry, itemString);
	}
	else
	{
		ConvertDatumToBytes(item, itemTypeCacheEntry, itemString);
	}

	/*
	 * Calculate hash values for the given item and then get frequency estimate
	 * with these hashed values.
	 */
	MurmurHash3_x64_128(itemString->data, itemString->len, MURMUR_SEED, &hashValueArray);
	frequency = CmsTopnEstimateHashedItemFrequency(cmsTopn, hashValueArray);

	return frequency;
}
Exemplo n.º 6
0
 template<> MurmurHash<64>::result_type MurmurHash<64>::hash(const void* key, size_t n, result_type seed)
 {
     uint64_t res[2];
     uint32_t* s = (uint32_t*)&seed;
     MurmurHash3_x64_128(key, (int)n, *s, (void*)res);
     return res[0];
 }
Exemplo n.º 7
0
static PyObject *
mmh3_hash128(PyObject *self, PyObject *args, PyObject *keywds)
{
    const char *target_str;
    int target_str_len;
    uint32_t seed = 0;
    uint64_t result[2];
    char x64arch = 1;

    static char *kwlist[] = {(char *)"key", (char *)"seed",
      (char *)"x64arch", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|iB", kwlist,
        &target_str, &target_str_len, &seed, &x64arch)) {
        return NULL;
    }

    if (x64arch == 1) {
      MurmurHash3_x64_128(target_str, target_str_len, seed, result);
    } else {
      MurmurHash3_x86_128(target_str, target_str_len, seed, result);
    }

    
    /**
     * _PyLong_FromByteArray is not a part of official Python/C API
     * and can be displaced (although it is practically stable). cf.
     * https://mail.python.org/pipermail/python-list/2006-August/372368.html
     */
    PyObject *retval = _PyLong_FromByteArray((unsigned char *)result, 16, 1, 0);
    return retval;
}
Exemplo n.º 8
0
static PyObject *
mmh3_hash64(PyObject *self, PyObject *args, PyObject *keywds)
{
    const char *target_str;
    int target_str_len;
    uint32_t seed = 0;
    int64_t result[2];
    char x64arch = 1;

    static char *kwlist[] = {(char *)"key", (char *)"seed",
      (char *)"x64arch", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|iB", kwlist,
        &target_str, &target_str_len, &seed, &x64arch)) {
        return NULL;
    }

    if (x64arch == 1) {
      MurmurHash3_x64_128(target_str, target_str_len, seed, result);
    } else {
      MurmurHash3_x86_128(target_str, target_str_len, seed, result);
    }

    PyObject *retval = Py_BuildValue("ll", result[0], result[1]);
    return retval;
}
Exemplo n.º 9
0
static PyObject *
mmh3_hash_bytes(PyObject *self, PyObject *args, PyObject *keywds)
{
    const char *target_str;
    int target_str_len;
    uint32_t seed = 0;
    uint32_t result[4];
    char x64arch = 1;

    static char *kwlist[] = {(char *)"key", (char *)"seed",
      (char *)"x64arch", NULL};

    if (!PyArg_ParseTupleAndKeywords(args, keywds, "s#|iB", kwlist,
        &target_str, &target_str_len, &seed, &x64arch)) {
        return NULL;
    }

    if (x64arch == 1) {
      MurmurHash3_x64_128(target_str, target_str_len, seed, result);
    } else {
      MurmurHash3_x86_128(target_str, target_str_len, seed, result);
    }

    char bytes[16];
    memcpy(bytes, result, 16);
    return PyBytes_FromStringAndSize(bytes, 16);
}
Exemplo n.º 10
0
hash_u getHash(const char * seq, int length)
{
    //for ( int i = 0; i < length; i++ ) { cout << *(seq + i); } cout << endl;
    
    bool use64 = length > 16;
    char data[use64 ? 8 : 4];
    
#ifdef ARCH_32
    MurmurHash3_x86_32(seq, length > 16 ? 16 : length, seed, data);
    if ( use64 )
    {
        MurmurHash3_x86_32(seq + 16, length - 16, seed, data + 4);
    }
#else
    MurmurHash3_x64_128(seq, length, seed, data);
#endif    
    
    hash_u hash;
    
    if ( use64 )
    {
        hash.hash64 = *((hash64_t *)data);
    }
    else
    {
        hash.hash32 = *((hash32_t *)data);
    }
    
    return hash;
}
Exemplo n.º 11
0
void
Hash::append(const Hash& other)
{
    std::ostringstream stream;
    stream << _hash[0] << "_" << _hash[1] << other._hash[0] << "_" << other._hash[1];
    MurmurHash3_x64_128(stream.str().c_str(), (int32_t)(stream.str().length() * sizeof(uint8_t)), 0, &_hash[0]);
}
Exemplo n.º 12
0
    template<> MurmurHash<256>::result_type MurmurHash<256>::hash(const void* key, size_t n, result_type seed)
    {
        result_type h;
        uint32_t* ph = (uint32_t*)&h;
        uint32_t* ps = (uint32_t*)&seed;
#ifdef _ZKS64
        MurmurHash3_x64_128(key, (int)n, *(ps), (void*)ph);
        ++ps; ph += 4;
        MurmurHash3_x64_128(key, (int)n, *(ps), (void*)ph);
#else
        MurmurHash3_x86_128(key, (int)n, *(ps), (void*)ph);
        ++ps; ph += 4;
        MurmurHash3_x86_128(key, (int)n, *(ps), (void*)ph);
#endif
        return h;
    }
Exemplo n.º 13
0
/**
 * Adds a new key to the HLL
 * @arg h The hll to add to
 * @arg key The key to add
 */
void hll_add(hll_t *h, char *key) {
    // Compute the hash value of the key
    uint64_t out[2];
    MurmurHash3_x64_128(key, strlen(key), 0, &out);

    // Add the hashed value
    hll_add_hash(h, out[1]);
}
Exemplo n.º 14
0
// Ref: Adam Kirsch and Michael Mitzenmacher
//      Less Hashing, Same Performance: Building a Better Bloom Filter
void bloom_filter_t::generate_indexes(const void *key, size_t len) const
{
	uint64_t hash[2];

	MurmurHash3_x64_128(key, len, seed_, hash);
	for (size_t i = 0; i < num_hashes_; i++)
		indexes_[i] = (hash[0] + i * hash[1]) % num_buckets_;
}
Exemplo n.º 15
0
void MurmurHash3_128_wrapper(const void *key, uint32_t len, uint32_t seed, void *out)
{
#if defined(PLATFORM64)
    MurmurHash3_x64_128(key, len, seed, out);
#else
    MurmurHash3_x86_128(key, len, seed, out);
#endif
}
Exemplo n.º 16
0
void ring_murmurhash3_x64_128(void *pPointer)
{
    char *key = NULL;
    int keylen;
    int seed = 0;
    uint64_t out[2];
    int ret_type = 0;

    List *tmp_list, *ret_val;

    if (RING_API_PARACOUNT < 2 || RING_API_PARACOUNT > 3) {
        RING_API_ERROR(RING_API_MISS2PARA);
        return ;
    }

    if (!RING_API_ISSTRING(1)) {
        RING_API_ERROR("murmurhash3_x64_128 expects the first parameter to be a string");
        return;
    }

    if (!RING_API_ISNUMBER(2)) {
        RING_API_ERROR("murmurhash3_x64_128 expects the first parameter to be an integer");
        return;
    }

    key = RING_API_GETSTRING(1);
    keylen = strlen(key);
    seed = RING_API_GETNUMBER(2);

    if (RING_API_PARACOUNT == 3) {
        if (RING_API_ISNUMBER(3)) {
            ret_type = RING_API_GETNUMBER(3);
            if (!is_bool(ret_type)) {
                RING_API_ERROR("Third parameter should be boolean value\n");
            }
        } else {
            RING_API_ERROR("murmurhash3_x64_128 expects the third parameter to be an integer\n");
        }
    }

    MurmurHash3_x64_128(key, keylen, seed, out);

    ret_val = RING_API_NEWLIST;
    tmp_list = ring_list_newlist_gc(((VM *)pPointer)->pRingState, ret_val);

    for (int i = 0; i < 2; i++) {
        if (ret_type) {
            char tmp[50];
            LONG2HEX(tmp, out[i]);
            ring_list_addstring2(tmp_list, (char*) tmp, strlen((char *) tmp));
        } else {
            ring_list_addint(tmp_list, out[i]);
        }
    }

    RING_API_RETLIST(ret_val);
}
Exemplo n.º 17
0
    template<> MurmurHash<128>::result_type MurmurHash<128>::hash(const void* key, size_t n, result_type seed)
    {
        result_type h;
#ifdef _ZKS64
        MurmurHash3_x64_128(key, (int)n, *((uint32_t*)&seed), (void*)&h);
#else
        MurmurHash3_x86_128(key, (int)n, *((uint32_t*)&seed), (void*)&h);
#endif
        return h;
    }
Exemplo n.º 18
0
uint64_t hashString2(char *s, int len) {
    uint64_t hash_val[2];
    uint32_t seed = 0xAAAAAAAA;
#if UINTPTR_MAX == 0xffffffff
    MurmurHash3_x86_128((void *) s, len, seed, (void *) &hash_val);
#else
    MurmurHash3_x64_128((void *) s, len, seed, (void *) &hash_val);
#endif
    return hash_val[0];
}
Exemplo n.º 19
0
static void hash_func(BLOOM *bloom, const char *key, uint32_t key_len, uint32_t *hashes)
{
	int i;
	uint32_t checksum[4];
	MurmurHash3_x64_128(key, key_len, SALT_CONSTANT, checksum);
	uint32_t h1 = checksum[0];
    uint32_t h2 = checksum[1];

    for (i = 0; i < bloom->num_funcs; i++) {
    	hashes[i] = (h1 + i * h2) % bloom->size;
    }
}
Exemplo n.º 20
0
int
interval_belongs (interval_t * interval, uint64_t key)
{

    struct _hkey_t hkey;

    MurmurHash3_x64_128 ((const void *) &key, (int) sizeof (uint64_t), 0,
                         (void *) &hkey);

    return interval_belongs_h (interval, &hkey);

}
Exemplo n.º 21
0
const generator_t::digest_type& generator_t::digest() const
{
    if( !digest_)
    {
        digest_ = digest_type();
        MurmurHash3_x64_128( reinterpret_cast<void*>( const_cast<char*>( ss_.str().c_str())),
                             ss_.str().size(),
                             0, // seed
                             reinterpret_cast<void*>( const_cast<digest_type::iterator>( digest().begin())));
    }

    return digest_.get();
}
Exemplo n.º 22
0
CMSData CountMinSketch::get(const void* key, int num_bytes) const
{
    // Return the min count in all vectors
    CMSData min = std::numeric_limits<uint8_t>::max();
    for(size_t i = 0; i < m_vectors.size(); ++i) {
        int64_t h[2];
        MurmurHash3_x64_128(key, num_bytes, m_hashes[i], &h);
        size_t idx = h[0] % m_vectors[i].size();
        if(m_vectors[i][idx] < min)
            min = m_vectors[i][idx];
    }
    return min;
}
Exemplo n.º 23
0
/*
 * UpdateSketchInPlace updates sketch inside CmsTopn in-place with given item
 * and returns new estimated frequency for the given item.
 */
static Frequency
UpdateSketchInPlace(CmsTopn *cmsTopn, Datum newItem,
                    TypeCacheEntry *newItemTypeCacheEntry)
{
	uint32 hashIndex = 0;
	uint64 hashValueArray[2] = {0, 0};
	StringInfo newItemString = makeStringInfo();
	Frequency newFrequency = 0;
	Frequency minFrequency = MAX_FREQUENCY;

	/* get hashed values for the given item */
	ConvertDatumToBytes(newItem, newItemTypeCacheEntry, newItemString);
	MurmurHash3_x64_128(newItemString->data, newItemString->len, MURMUR_SEED,
	                    &hashValueArray);

	/*
	 * Estimate frequency of the given item from hashed values and calculate new
	 * frequency for this item.
	 */
	minFrequency = CmsTopnEstimateHashedItemFrequency(cmsTopn, hashValueArray);
	newFrequency = minFrequency + 1;

	/*
	 * We can create an independent hash function for each index by using two hash
	 * values from the Murmur Hash function. This is a standard technique from the
	 * hashing literature for the additional hash functions of the form
	 * g(x) = h1(x) + i * h2(x) and does not hurt the independence between hash
	 * function. For more information you can check this paper:
	 * http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
	 */
	for (hashIndex = 0; hashIndex < cmsTopn->sketchDepth; hashIndex++)
	{
		uint64 hashValue = hashValueArray[0] + (hashIndex * hashValueArray[1]);
		uint32 widthIndex = hashValue % cmsTopn->sketchWidth;
		uint32 depthOffset = hashIndex * cmsTopn->sketchWidth;
		uint32 counterIndex = depthOffset + widthIndex;

		/*
		 * Selective update to decrease effect of collisions. We only update
		 * counters less than new frequency because other counters are bigger
		 * due to collisions.
		 */
		Frequency counterFrequency = cmsTopn->sketch[counterIndex];
		if (newFrequency > counterFrequency)
		{
			cmsTopn->sketch[counterIndex] = newFrequency;
		}
	}

	return newFrequency;
}
Exemplo n.º 24
0
/**
 * Hashit builds a hash of the key and returns the bucket to use and a
 * fingerprint value.
 * @param key 64-bit key to hash
 * @param func hash function to use
 * @param *b pointer to location of bucket return value
 * @param *fp pointer to location of fingerprint return value
 */
static void hashmap_hashit(struct pna_hashmap *map, void *key, int func, uint32_t *b, uint32_t *fp)
{
    uint64_t out[2];

    /* main hashing routine */
#define C0 0xa96347c5
#define C1 0xe65ac2d3
    MurmurHash3_x64_128(key, map->key_size, func ? C0 : C1, out);

    //printf("hash: 0x%16llx:%16llx\n", out[0], out[1]);

    *b = out[0] & map->bkt_mask;
    *fp = out[1] & map->fp_mask;
}
Exemplo n.º 25
0
/*
 * Perform the actual hashing for `key`
 *
 * Only call the hash once to get a pair of initial values (h1 and
 * h2). Use these values to generate all hashes in a quick loop.
 *
 * See paper by Kirsch, Mitzenmacher [2006]
 * http://www.eecs.harvard.edu/~michaelm/postscripts/rsa2008.pdf
 *
 * Slightly modified version from dablooms -- gvb
 */
inline static void
hash_func(const char * data, size_t datalen, uint32_t * hashes,
	int nfuncs, int counts_per_func)
{
	int i;
	uint32_t checksum[4], h1, h2;

	MurmurHash3_x64_128(data, datalen, SALT_CONSTANT, checksum);
	h1 = checksum[0];
	h2 = checksum[1];
	for (i = 0; i < nfuncs; i++) {
		hashes[i] = (h1 + i * h2) % counts_per_func;
	}
}
Exemplo n.º 26
0
static int qp_avl_insert(struct avltree *t, avl_unit_val_t *v)
{
	/*
	 * Insert with quadatic, linear probing.  A unique k is assured for
	 * any k whenever size(t) < max(uint64_t).
	 *
	 * First try quadratic probing, with coeff. 2 (since m = 2^n.)
	 * A unique k is not assured, since the codomain is not prime.
	 * If this fails, fall back to linear probing from hk.k+1.
	 *
	 * On return, the stored key is in v->hk.k, the iteration
	 * count in v->hk.p.
	 **/
	struct avltree_node *tmpnode;
	uint32_t j, j2;
	uint32_t hk[4];

	assert(avltree_size(t) < UINT64_MAX);

	MurmurHash3_x64_128(v->name, strlen(v->name), 67, hk);
	memcpy(&v->hk.k, hk, 8);

	for (j = 0; j < UINT64_MAX; j++) {
		v->hk.k = (v->hk.k + (j * 2));
		tmpnode = avltree_insert(&v->node_hk, t);
		if (!tmpnode) {
			/* success, note iterations and return */
			v->hk.p = j;
			return 0;
		}
	}

	/* warn debug */

	memcpy(&v->hk.k, hk, 8);
	for (j2 = 1 /* tried j=0 */; j2 < UINT64_MAX; j2++) {
		v->hk.k = v->hk.k + j2;
		tmpnode = avltree_insert(&v->node_hk, t);
		if (!tmpnode) {
			/* success, note iterations and return */
			v->hk.p = j + j2;
			return 0;
		}
		j2++;
	}

	/* warn crit  */
	return -1;
}
Exemplo n.º 27
0
int main () {
   uint8_t fox[] = "The quick brown fox jumps over the lazy dog.";
   uint8_t foxlen = 44;

   uint32_t fox_x86_32[1];
   uint64_t fox_x86_64[2];
   uint64_t fox_x64[2];

   MurmurHash3_x86_32 (fox, foxlen, 0, fox_x86_32);
   MurmurHash3_x86_128 (fox, foxlen, 0, fox_x86_64);
   MurmurHash3_x64_128 (fox, foxlen, 0, fox_x64);

   //printf( "mm3 x86 32: %08X\n",    fox_x86_32[0] );
   //printf( "mm3 x86 64: %016lX %016lX\n", fox_x86_64[0], fox_x86_64[1] );
   printf( "%s, %016lX%016lX\n", fox, fox_x64[0], fox_x64[1] );
}
Exemplo n.º 28
0
void CountMinSketch::increment(const void* key, int num_bytes)
{
    assert(!m_vectors.empty());
    // Increment all vectors
    for(size_t i = 0; i < m_vectors.size(); ++i) {
        int64_t h[2];
        MurmurHash3_x64_128(key, num_bytes, m_hashes[i], &h);
        size_t idx = h[0] % m_vectors[i].size();
        
        // Perform an atomic compare and swap to increment the value
        // If the value has reached saturation, do not update
        while(1) {
            CMSData v = m_vectors[i][idx];
            if(v == m_max_count || __sync_bool_compare_and_swap(&m_vectors[i][idx], v, v + 1))
                break;
        }
    }
}
Exemplo n.º 29
0
static PyObject *
_py_murmur3_128(PyObject *self, PyObject *args, int x86, int size)
{
    const char *key;
    Py_ssize_t len;
    uint32_t seed = 0;
    unsigned char out[16];

    if (!PyArg_ParseTuple(args, "s#|I", &key, &len, &seed)) {
        return NULL;
    }

    if (x86) {
        MurmurHash3_x86_128((void *)key, len, seed, &out);
    } else {
        MurmurHash3_x64_128((void *)key, len, seed, &out);
    }

    return _PyLong_FromByteArray((const unsigned char *)&out, size, 0, 0);
}
Exemplo n.º 30
0
// Computes our hashes
void bf_compute_hashes(uint32_t k_num, char *key, uint64_t *hashes) {
    /**
     * We use the results of
     * 'Less Hashing, Same Performance: Building a Better Bloom Filter'
     * http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf, to use
     * g_i(x) = h1(u) + i * h2(u) mod m'
     *
     * This allows us to only use 2 hash functions h1, and h2 but generate
     * k unique hashes using linear combinations. This is a vast speedup
     * over our previous technique of 4 hashes, that used double hashing.
     *
     */

    // Get the length of the key
    uint64_t len = strlen(key);

    // Compute the first hash
    uint64_t out[2];
    MurmurHash3_x64_128(key, len, 0, &out);

    // Copy these out
    hashes[0] = out[0];  // Upper 64bits of murmur
    hashes[1] = out[1];  // Lower 64bits of murmur

    // Compute the second hash
    uint64_t *hash1 = (uint64_t*)&out;
    uint64_t *hash2 = hash1+1;
    SpookyHash128(key, len, 0, 0, hash1, hash2);

    // Copy these out
    hashes[2] = out[0];   // Use the upper 64bits of Spooky
    hashes[3] = out[1];   // Use the lower 64bits of Spooky

    // Compute an arbitrary k_num using a linear combination
    // Add a mod by the largest 64bit prime. This only reduces the
    // number of addressable bits by 54 but should make the hashes
    // a bit better.
    for (uint32_t i=4; i < k_num; i++) {
        hashes[i] = hashes[1] + ((i * hashes[3]) % 18446744073709551557U);
    }
}