udmhash64_t UdmHash64(const char * buf, size_t size) { #ifndef WIN32 return hash64(buf, size, 0xb7e151628aed2a6bLL); #else return hash64(buf, size, 0xb7e151628aed2a6b); #endif }
void TestCppTools::hash_fnv_test() { // TODO: fill testDataVec with http://www.isthe.com/chongo/src/fnv/test_fnv.c const hash_fnv_test::TestData testDataVec[] = { { "", 0x811c9dc5UL, 0xcbf29ce484222325ULL }, { "a", 0xe40c292cUL, 0xaf63dc4c8601ec8cULL }, { "b", 0xe70c2de5UL, 0xaf63df4c8601f1a5ULL }, { "c", 0xe60c2c52UL, 0xaf63de4c8601eff2ULL }, { "d", 0xe10c2473UL, 0xaf63d94c8601e773ULL }, { "e", 0xe00c22e0UL, 0xaf63d84c8601e5c0ULL } }; for (const auto& testData : testDataVec) { cpp::hash32_fnv_1a hash32; cpp::hash64_fnv_1a hash64; const auto byteSeqLen = std::strlen(testData.byteSeq); QCOMPARE(hash32(testData.byteSeq), testData.hash32_fnv_1a); QCOMPARE(hash32(testData.byteSeq, byteSeqLen), testData.hash32_fnv_1a); QCOMPARE(hash32(testData.byteSeq, testData.byteSeq + byteSeqLen), testData.hash32_fnv_1a); QCOMPARE(hash64(testData.byteSeq), testData.hash64_fnv_1a); QCOMPARE(hash64(testData.byteSeq, byteSeqLen), testData.hash64_fnv_1a); QCOMPARE(hash64(testData.byteSeq, testData.byteSeq + byteSeqLen), testData.hash64_fnv_1a); } }
/** * Adds an arbitrary object to be counted. Any object type can be used, * and there are no restrictions as long as std::hash<T> can be used to * obtain a hash value. */ void add(const T& t, size_t count = 1) { // we use std::hash first, to bring it to a 64-bit number size_t i = hash64(std::hash<T>()(t)); for (size_t j = 0; j < num_hash; ++j) { size_t bin = hash64(seeds[j] ^ i) % num_bins; // TODO: bit mask counts[j][bin] += count; } }
void add(const T& t) { // we use std::hash first, to bring it to a 64-bit number // Then cityhash's hash64 twice to distribute the hash. // empirically, one hash64 does not produce enough scattering to // get a good estimate size_t h = hash64(hash64(std::hash<T>()(t))); size_t index = h >> (64 - m_b); DASSERT_LT(index, m_buckets.size()); unsigned char pos = h != 0 ? 1 + __builtin_clz(h) : sizeof(size_t); m_buckets[index] = std::max(m_buckets[index], pos); }
/** * Returns the estimate of the frequency for a given object. */ inline size_t estimate(const T& t) { size_t E = std::numeric_limits<size_t>::max(); size_t i = hash64(std::hash<T>()(t)); // Compute the minimum value across hashes. for (size_t j = 0; j < num_hash; ++j) { size_t bin = hash64(seeds[j] ^ i) % num_bins; if (counts[j][bin] < E) E = counts[j][bin]; } return E; }
/** * Adds an arbitrary object to be counted. Any object type can be used, * and there are no restrictions as long as std::hash<T> can be used to * obtain a hash value. * * Note: * Theoretical properties only apply to the situation where count is 1. */ void add(const T& t, size_t count = 1) { // Create a 64-bit number from the object size_t i = hash64(std::hash<T>()(t)); for (size_t j = 0; j < num_hash; ++j) { // convert trailing bit to 1 or -1 counter_int s = (counter_int)( hash64(seeds_binary[j] ^ i) & 1); s = 2*s - 1; // compute which bin to increment size_t bin = hash64(seeds[j] ^ i) % num_bins; // TODO: bit mask counts[j][bin] += s * (counter_int) count; } }
size_t rcpplambda_evaluator::make_lambda(const std::string& lambda_str) { size_t hash_key = hash64(lambda_str.c_str(), lambda_str.size()); std::vector<Rcpp::Function> fun_lst; std::vector<std::string> fun_names; std::vector<std::string> strs; boost::split(strs, lambda_str, boost::is_any_of("\n")); if (strs[0] != "") { std::string lib_to_load = "suppressMessages(lapply(" + strs[0] + ", require, character.only = TRUE))"; m_lambda_lib_hash[hash_key] = lib_to_load; } else { m_lambda_lib_hash[hash_key] = ""; } R_ptr->parseEvalQ("library('RApiSerialize')"); for (size_t i = 1; i < strs.size() - 1; i = i + 2) { fun_lst.push_back(Rcpp::Function(unserializeFromStr(strs[i]))); fun_names.push_back(strs[i + 1]); } m_lambda_hash[hash_key] = fun_lst; m_lambda_name_hash[hash_key] = fun_names; return hash_key; }
int32_t XmlNode::setCommentNode2 ( char *node ) { m_nodeId = TAG_COMMENT; m_isBreaking = false;//true; m_isVisible = false;//true; m_hasBackTag = false; m_hash = hash64 ( "![" , 2 , 0LL ); m_node = node; m_tagName = node + 1; m_tagNameLen = 2; // . compute node length // . TODO: do we have to deal with quotes???? // . TODO: what about nested comments? int32_t i; for ( i = 2 ; node[i] ; i++ ) { // look for ending of ]> like for <![if gt IE 6]> if ( node[i] !='>' ) continue; if ( node[i-1] ==']' ) break; // look for ending of --> like for <![endif]--> if ( node[i-1] == '-' && node[i-2] == '-' ) break; } // skip i over the >, if any (could be end of doc) if ( node[i] == '>' ) i++; m_nodeLen = i; return i; }
size_t _DkFastRandomBitsRead (void * buffer, size_t size) { unsigned long rand; size_t bytes = 0; _DkInternalLock(&lock); rand = seed; while (!seed) { _DkInternalUnlock(&lock); int ret = _DkRandomBitsRead(&rand, sizeof(rand)); if (ret < 0) return ret; _DkInternalLock(&lock); seed = rand; } do { if (bytes + sizeof(rand) <= size) { *(unsigned long *) ((char *) buffer + bytes) = rand; bytes += sizeof(rand); } else { for (size_t i = 0 ; i < size - bytes ; i++) *(unsigned char *) ((char *) buffer + bytes + i) = ((unsigned char *) &rand)[i]; bytes = size; } do { rand = hash64(rand); } while (!rand); } while (bytes < size); seed = rand; _DkInternalUnlock(&lock); return bytes; }
/* * Resizes a HashTable to have 'newsize' buckets. * This is called automatically when adding or removing items so that the * hash table keeps at a sensible scale. * * FIXME: Halving the size of the hash table is simply a matter of coaelescing * every other bucket. Instead we currently rehash (which is slower). * Doubling the size of the hash table currently requires rehashing, but this * too could be optimised by storing the full 32-bit hash of the key along * with the key itself. This then means that it's just a matter of seeing what * the next significant bit is. It's a memory vs speed tradeoff though and * re-hashing is pretty quick. * * Returns 0 for success * -1 for failure */ int HashTableResize(HashTable *h, int newsize) { HashTable *h2; int i; /* fprintf(stderr, "Resizing to %d\n", newsize); */ /* Create a new hash table and rehash everything into it */ h2 = HashTableCreate(newsize, h->options); for (i = 0; i < h->nbuckets; i++) { HashItem *hi, *next; for (hi = h->bucket[i]; hi; hi = next) { uint64_t hv = hash64(h2->options & HASH_FUNC_MASK, (uint8_t *)hi->key, hi->key_len) & h2->mask; next = hi->next; hi->next = h2->bucket[hv]; h2->bucket[hv] = hi; } } /* Swap the links over & free */ free(h->bucket); h->bucket = h2->bucket; h->nbuckets = h2->nbuckets; h->mask = h2->mask; free(h2); return 0; }
int32_t XmlNode::setCommentNode ( char *node ) { m_nodeId = TAG_COMMENT; m_isBreaking = true; m_isVisible = true; m_hasBackTag = false; m_hash = hash64 ( "!--" , 3 , 0LL ); m_node = node; m_tagName = node + 1; // !-- m_tagNameLen = 3; // . compute node length // . TODO: do we have to deal with quotes???? // . TODO: what about nested comments? int32_t i; for ( i = 3 ; node[i] ; i++ ) { if ( node[i] !='>' ) continue; if ( node[i-1] !='-' ) continue; if ( node[i-2] =='-' ) break; } // skip i over the >, if any (could be end of doc) if ( node[i] == '>' ) i++; m_nodeLen = i; return i; }
bool GDI2FT_RENDERER::fetch_glyph_run( bool is_glyph_index, bool is_pdy, LPCWSTR lpString, int c, CONST INT* lpDx, GDI2FT_GLPYH_RUN& glyph_run ) /* -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- */ { HASH_VALUE erased_trait; if( glyph_cache.glyph_run_lru.access( font_trait, erased_trait ) ) glyph_cache.erase_font_trait( erased_trait ); #ifdef _M_X64 const HASH_VALUE str_hash = hash64( lpString, c * sizeof( WCHAR ), is_glyph_index ); #else const HASH_VALUE str_hash = hash32( lpString, c * sizeof( WCHAR ), is_glyph_index ); #endif // _M_X64 if( !glyph_cache.lookup_glyph_run( font_trait, str_hash, glyph_run ) ) { GDI2FT_MUTEX mutex( GDI2FT_MUTEX::MUTEX_GLYPH_RUN_CACHE ); if( !glyph_cache.lookup_glyph_run( font_trait, str_hash, glyph_run ) ) { if( render( is_glyph_index, is_pdy, lpString, c, lpDx, glyph_run ) == 0 ) return false; glyph_cache.store_glyph_run( font_trait, str_hash, glyph_run ); } } return true; }
//make a cache key for a request int64_t Msg20Request::makeCacheKey() const { SafeBuf hash_buffer; hash_buffer.pushLong(m_numSummaryLines); hash_buffer.pushLong(m_getHeaderTag); hash_buffer.pushLongLong(m_docId); hash_buffer.pushLong(m_titleMaxLen); hash_buffer.pushLong(m_summaryMaxLen); hash_buffer.pushLong(m_summaryMaxNumCharsPerLine); hash_buffer.pushLong(m_collnum); hash_buffer.pushLong(m_highlightQueryTerms); hash_buffer.pushLong(m_getSummaryVector); hash_buffer.pushLong(m_showBanned); hash_buffer.pushLong(m_includeCachedCopy); hash_buffer.pushLong(m_doLinkSpamCheck); hash_buffer.pushLong(m_isLinkSpam); hash_buffer.pushLong(m_isSiteLinkInfo); hash_buffer.pushLong(m_getLinkInfo); hash_buffer.pushLong(m_onlyNeedGoodInlinks); hash_buffer.pushLong(m_getLinkText); hash_buffer.safeMemcpy(ptr_qbuf,size_qbuf); hash_buffer.safeMemcpy(ptr_ubuf,size_ubuf); hash_buffer.safeMemcpy(ptr_linkee,size_linkee); hash_buffer.safeMemcpy(ptr_displayMetas,size_displayMetas); int64_t h = hash64(hash_buffer.getBufStart(), hash_buffer.length()); return h; }
struct cache_inode * search_cache_inode(struct sfs_fs *sfs, ino_t real) { struct cache_inode *ci = sfs->inodes[hash64(real)]; while (ci != NULL && ci->real != real) { ci = ci->hash_next; } return ci; }
//this one returns all the 7 hashes //maybe use xorshift instead, for faster hash compute hash_set_t operator () (const Item& key) { hash_set_t hset; for(size_t ii=0;ii<10; ii++) { hset[ii] = hash64 (key, _seed_tab[ii]); } return hset; }
static struct cache_inode * alloc_cache_inode(struct sfs_fs *sfs, ino_t real, uint32_t ino, uint16_t type) { struct cache_inode *ci = safe_malloc(sizeof(struct cache_inode)); ci->ino = (ino != 0) ? ino : sfs_alloc_ino(sfs); ci->real = real, ci->nblks = 0, ci->l1 = ci->l2 = NULL; struct inode *inode = &(ci->inode); memset(inode, 0, sizeof(struct inode)); inode->type = type; struct cache_inode **head = sfs->inodes + hash64(real); ci->hash_next = *head, *head = ci; return ci; }
// . dddddddd dddddddd dddddddd dddddddd d = domain hash w/o collection // . uuuuuuuu uuuuuuuu uuuuuuuu uuuuuuuu u = url hash // . uuuuuuuu uuuuuuuu uuuuuuuu uuuuuuuu key_t Catdb::makeKey ( Url *site, bool isDelete ) { key_t k; // . get startKey based on "site"'s domain // . if "site"'s domain is an ip address (non-canonical) then use ip getKeyRange ( site->isIp() , site , &k , NULL); // set lower 64 bits of key to hash of this url k.n0 = hash64 ( site->getUrl() , site->getUrlLen() ); // clear low bit if we're a delete, otherwise set it if ( isDelete ) k.n0 &= 0xfffffffffffffffeLL; else k.n0 |= 0x0000000000000001LL; return k; }
// . is "s" an HTML entity? (ascii representative of an iso char) // . return the 32-bit unicode char it represents // . returns 0 if none // . JAB: const-ness for optimizer... static const Entity *getTextEntity ( const char *s , int32_t len ) { if ( !initEntityTable()) return 0; // take the ; off, if any if ( s[len-1] == ';' ) len--; // compute the hash of the entity including &, but not ; int64_t h = hash64 ( s , len ); // get the entity index from table (stored in the score field) int32_t i = (int32_t) s_table.getScore(h); // return 0 if no match if ( i == 0 ) return NULL; // point to the utf8 char. these is 1 or 2 bytes it seems return s_entities+i-1; }
/** * Returns the estimate of the frequency for a given object. */ inline counter_int estimate(const T& t) { // Create a 64-bit number from the object size_t i = hash64(std::hash<T>()(t)); // Compute the minimum value across hashes. std::vector<counter_int> estimates; for (size_t j = 0; j < num_hash; ++j) { // convert trailing bit to 1 or -1 counter_int s = (counter_int) (hash64(seeds_binary[j] ^ i) & 1); // convert trailing bit to 1 or -1 s = 2*s - 1; // compute which bin to increment size_t bin = hash64(seeds[j] ^ i) % num_bins; // TODO: bit mask counter_int estimate = s * counts[j][bin]; estimates.push_back(estimate); } // Return the median std::nth_element(estimates.begin(), estimates.begin() + estimates.size()/2, estimates.end()); return estimates[estimates.size()/2]; }
static uptrint_t bits_hash(void *b, size_t sz) { switch (sz) { case 1: return int32hash(*(int8_t*)b); case 2: return int32hash(*(int16_t*)b); case 4: return int32hash(*(int32_t*)b); case 8: return hash64(*(int64_t*)b); default: #ifdef _P64 return memhash((char*)b, sz); #else return memhash32((char*)b, sz); #endif } }
// . is "s" an HTML entity? (ascii representative of an iso char) // . return the 32-bit unicode char it represents // . returns 0 if none // . JAB: const-ness for optimizer... uint32_t getTextEntity ( const char *s , int32_t len ) { if ( !initEntityTable()) return 0; // take the ; off, if any if ( s[len-1] == ';' ) len--; // compute the hash of the entity including &, but not ; int64_t h = hash64 ( s , len ); // get the entity index from table (stored in the score field) int32_t i = (int32_t) s_table.getScore ( &h ); // return 0 if no match if ( i == 0 ) return 0; // point to the utf8 char. these is 1 or 2 bytes it seems char *p = (char *)s_entities[i-1].utf8; // encode into unicode uint32_t c = utf8Decode ( p ); // return that return c; }
GDI2FT_RENDERER::GDI2FT_RENDERER( const GDI2FT_CONTEXT& _context ) /* -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- */ { context = &_context; render_mode = FT_RENDER_MODE_LCD; char_extra = GetTextCharacterExtra( context->hdc ); const int lf_metric_size = sizeof( context->log_font ) - sizeof( context->log_font.lfFaceName ); const int lf_facename_size = static_cast<const int>( ( wcslen( context->log_font.lfFaceName ) * sizeof( wchar_t ) ) ); const int lf_total_size = lf_metric_size + lf_facename_size; #ifdef _M_X64 font_trait = hash64( &context->log_font, lf_total_size, 0 ); #else font_trait = hash32( &context->log_font, lf_total_size, 0 ); #endif // _M_X64 }
DLLEXPORT uptrint_t jl_object_id(jl_value_t *v) { if (jl_is_symbol(v)) return ((jl_sym_t*)v)->hash; jl_value_t *tv = (jl_value_t*)jl_typeof(v); if (jl_is_bits_type(tv)) { size_t nb = jl_bitstype_nbits(tv)/8; uptrint_t h = inthash((uptrint_t)tv); switch (nb) { case 1: return int32hash(*(int8_t*)jl_bits_data(v) ^ h); case 2: return int32hash(*(int16_t*)jl_bits_data(v) ^ h); case 4: return int32hash(*(int32_t*)jl_bits_data(v) ^ h); case 8: return hash64(*(int64_t*)jl_bits_data(v) ^ h); default: #ifdef __LP64__ return h ^ memhash((char*)jl_bits_data(v), nb); #else return h ^ memhash32((char*)jl_bits_data(v), nb); #endif } } if (tv == (jl_value_t*)jl_union_kind) { #ifdef __LP64__ return jl_object_id(jl_fieldref(v,0))^0xA5A5A5A5A5A5A5A5L; #else return jl_object_id(jl_fieldref(v,0))^0xA5A5A5A5; #endif } if (jl_is_struct_type(tv)) return inthash((uptrint_t)v); assert(jl_is_tuple(v)); uptrint_t h = 0; size_t l = jl_tuple_len(v); for(size_t i = 0; i < l; i++) { uptrint_t u = jl_object_id(jl_tupleref(v,i)); h = bitmix(h, u); } return h; }
int fmt_cscalc( fmt_t *obj, fmt_checksum_t *res ) { if ( !obj || !obj->js || !res ) return LIBFMT_ERR_OBJ_INVALID; fmt_stats_t stats; if ( fmt_stats( obj, &stats ) ) return LIBFMT_ERR_GENERIC; res->buflen = stats.dumplen; res->tok_used = stats.tok_used; res->siphash = hash64( obj->js, stats.dumplen ); res->crc = crc32( obj->js, stats.dumplen ); /* TODO: per pair (key:value) recursive hashing and then mix (xor?) * them all to enable object location independent checksum */ return LIBFMT_ERR_NONE; }
// . is "s" an HTML entity? (ascii representative of an iso char) // . return the 32-bit unicode char it represents // . returns 0 if none // . JAB: const-ness for optimizer... uint32_t getTextEntity ( char *s , int32_t len ) { if ( !initEntityTable()) return 0; // take the ; off, if any if ( s[len-1] == ';' ) len--; // compute the hash of the entity including &, but not ; int64_t h = hash64 ( s , len ); // get the entity index from table (stored in the score field) int32_t i = (int32_t) s_table.getScore ( &h ); // return 0 if no match if ( i == 0 ) return 0; // point to the utf8 char. these is 1 or 2 bytes it seems char *p = (char *)s_entities[i-1].utf8; // encode into unicode uint32_t c = utf8Decode ( p ); // return that return c; // return the iso character //printf("Converted text entity \""); //for(int si=0;si<len;si++)putchar(s[si]); //printf("\" to 0x%x(%d)\"%c\"\n",s_entities[i-1].c,s_entities[i-1].c, // s_entities[i-1].c); //return (uint32_t)s_entities[i-1].c; }
int32_t XmlNode::setCDATANode ( char *node ) { m_nodeId = TAG_CDATA; m_isBreaking = true; m_isVisible = true; m_hasBackTag = false; m_hash = hash64 ( "![CDATA[" , 8 , 0LL ); m_node = node; m_tagName = node + 1; // !-- m_tagNameLen = 8; // . compute node length // . TODO: do we have to deal with quotes???? // . TODO: what about nested comments? int32_t i; for ( i = 8 ; node[i] ; i++ ) { // seems like just ]] is good enough! don't need "]]>" //if ( node[i] !='>' ) continue; if ( node[i ] !=']' ) continue; if ( node[i+1] !=']' ) continue;//{ i++; break; } // but skip it if we got it if ( node[i+2] !='>' ) continue; //if ( node[i+2] == '>' ) { i+=3; break;} i += 3; break; // if does not end in '>', skip the ']' anyway // no! hurts regex ending in [0-9] //i+=2; break; } // skip i over the >, if any (could be end of doc) //if ( node[i] == '>' ) i++; m_nodeLen = i; return i; }
void _save_and_load_object(T& dest, const U& src, std::string dir) { // Create the directory boost::filesystem::create_directory(dir); _add_directory_to_deleter(dir); std::string arc_name = dir + "/test_archive"; uint64_t random_number = hash64(random::fast_uniform<size_t>(0,size_t(-1))); // Save it dir_archive archive_write; archive_write.open_directory_for_write(arc_name); graphlab::oarchive oarc(archive_write); oarc << src << random_number; archive_write.close(); // Load it dir_archive archive_read; archive_read.open_directory_for_read(arc_name); graphlab::iarchive iarc(archive_read); iarc >> dest; uint64_t test_number; iarc >> test_number; archive_read.close(); ASSERT_EQ(test_number, random_number); }
int main(int argc, char **argv) { char *str1 = "Apple"; char *str2 = "Candy"; char *str3 = "AppleCandy"; long long h1, h2, h3,h4; hashinit(); h1 = hash64Lower(str1, gbstrlen(str1)); h2 = hash64Lower(str2, gbstrlen(str2)); printf("h1: %lld, h2: %lld\n", h1, h2); h3 = hash64Lower(str3, gbstrlen(str3)); h4 = hash64Lower(str2, gbstrlen(str2),h1); printf("h3: %lld, h4: %lld\n", h3,h4); long long h5; h5 = h1^h2; printf("h5: %lld\n", h5); long long h6; h6 = hash64(h1,h2); printf("h6: %lld\n", h6); }
u_int128_t hash128 ( int32_t h1 , u_int128_t h2 ) { h2.n0 = hash64 ( h1 , h2.n0 ); h2.n1 = hash64 ( h1 , h2.n1 ); return h2; }
u_int128_t hash128 ( u_int128_t h1 , u_int128_t h2 ) { h1.n0 = hash64 ( h1.n0 , h2.n0 ); h1.n1 = hash64 ( h1.n1 , h2.n1 ); return h1; }