void CacheHash::Feed(uint32_t aVal, uint8_t aLen) { switch (mPos) { case 0: mA += aVal; mPos ++; break; case 1: mB += aVal; mPos ++; break; case 2: mPos = 0; if (aLen == 4) { mC += aVal; hashmix(mA, mB, mC); } else { mC += aVal << 8; } } mLength += aLen; }
/* -------------------------------------------------------------------- This works on all machines. To be useful, it requires -- that the key be an array of uint32_t's, and -- that the length be the number of uint32_t's in the key The function hashword() is identical to hashlittle() on little-endian machines, and identical to hashbig() on big-endian machines, except that the length has to be measured in uint32_ts rather than in bytes. hashlittle() is more complicated than hashword() only because hashlittle() has to dance around fitting the key bytes into registers. -------------------------------------------------------------------- */ uint32_t hashword( const uint32_t *k, /* the key, an array of uint32_t values */ size_t length, /* the length of the key, in uint32_ts */ uint32_t initval) /* the previous hash, or an arbitrary value */ { uint32_t a,b,c; /* Set up the internal state */ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; /*------------------------------------------------- handle most of the key */ while (length > 3) { a += k[0]; b += k[1]; c += k[2]; hashmix(a,b,c); length -= 3; k += 3; } /*------------------------------------------- handle the last 3 uint32_t's */ switch(length) /* all the case statements fall through */ { case 3 : c+=k[2]; case 2 : b+=k[1]; case 1 : a+=k[0]; hashfinal(a,b,c); case 0: /* case 0: nothing left to add */ break; } /*------------------------------------------------------ report the result */ return c; }
CacheHash::Hash32_t CacheHash::GetHash() { if (!mFinalized) { if (mBufPos) { Feed(mBuf, mBufPos); } mC += mLength; hashmix(mA, mB, mC); mFinalized = true; } return mC; }
/* -------------------------------------------------------------------- hashword2() -- same as hashword(), but take two seeds and return two 32-bit values. pc and pb must both be nonnull, and *pc and *pb must both be initialized with seeds. If you pass in (*pb)==0, the output (*pc) will be the same as the return value from hashword(). -------------------------------------------------------------------- */ void hashword2 ( const uint32_t *k, /* the key, an array of uint32_t values */ size_t length, /* the length of the key, in uint32_ts */ uint32_t *pc, /* IN: seed OUT: primary hash value */ uint32_t *pb) /* IN: more seed OUT: secondary hash value */ { uint32_t a,b,c; /* Set up the internal state */ a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; c += *pb; /*------------------------------------------------- handle most of the key */ while (length > 3) { a += k[0]; b += k[1]; c += k[2]; hashmix(a,b,c); length -= 3; k += 3; } /*------------------------------------------- handle the last 3 uint32_t's */ switch(length) /* all the case statements fall through */ { case 3 : c+=k[2]; case 2 : b+=k[1]; case 1 : a+=k[0]; hashfinal(a,b,c); case 0: /* case 0: nothing left to add */ break; } /*------------------------------------------------------ report the result */ *pc=c; *pb=b; }
/* * hashbig(): * This is the same as hashword() on big-endian machines. It is different * from hashlittle() on all machines. hashbig() takes advantage of * big-endian byte ordering. */ uint32_t hashbig( const void *key, size_t length, uint32_t initval) { uint32_t a,b,c; union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ /* Set up the internal state */ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; u.ptr = key; if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; b += k[1]; c += k[2]; hashmix(a,b,c); length -= 12; k += 3; } /*----------------------------- handle the last (probably partial) block */ /* * "k[2]<<8" actually reads beyond the end of the string, but * then shifts out the part it's not allowed to read. Because the * string is aligned, the illegal read is in the same word as the * rest of the string. Every machine with memory protection I've seen * does it on word boundaries, so is OK with this. But VALGRIND will * still catch it and complain. The masking trick does make the hash * noticably faster for short strings (like English words). */ #ifndef VALGRIND switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; case 5 : b+=k[1]&0xff000000; a+=k[0]; break; case 4 : a+=k[0]; break; case 3 : a+=k[0]&0xffffff00; break; case 2 : a+=k[0]&0xffff0000; break; case 1 : a+=k[0]&0xff000000; break; case 0 : return c; /* zero length strings require no hashmixing */ } #else /* make valgrind happy */ { const uint8_t *k8 = (const uint8_t *)k; switch(length) /* all the case statements fall through */ { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ case 4 : a+=k[0]; break; case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ case 1 : a+=((uint32_t)k8[0])<<24; break; case 0 : return c; } } #endif /* !VALGRIND */ } else { /* need to read the key one byte at a time */ const uint8_t *k = (const uint8_t *)key; /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += ((uint32_t)k[0])<<24; a += ((uint32_t)k[1])<<16; a += ((uint32_t)k[2])<<8; a += ((uint32_t)k[3]); b += ((uint32_t)k[4])<<24; b += ((uint32_t)k[5])<<16; b += ((uint32_t)k[6])<<8; b += ((uint32_t)k[7]); c += ((uint32_t)k[8])<<24; c += ((uint32_t)k[9])<<16; c += ((uint32_t)k[10])<<8; c += ((uint32_t)k[11]); hashmix(a,b,c); length -= 12; k += 12; } /*-------------------------------- last block: affect all 32 bits of (c) */ switch(length) /* all the case statements fall through */ { case 12: c+=k[11]; case 11: c+=((uint32_t)k[10])<<8; case 10: c+=((uint32_t)k[9])<<16; case 9 : c+=((uint32_t)k[8])<<24; case 8 : b+=k[7]; case 7 : b+=((uint32_t)k[6])<<8; case 6 : b+=((uint32_t)k[5])<<16; case 5 : b+=((uint32_t)k[4])<<24; case 4 : a+=k[3]; case 3 : a+=((uint32_t)k[2])<<8; case 2 : a+=((uint32_t)k[1])<<16; case 1 : a+=((uint32_t)k[0])<<24; break; case 0 : return c; } } hashfinal(a,b,c); return c; }
/* * hashlittle2: return 2 32-bit hash values * * This is identical to hashlittle(), except it returns two 32-bit hash * values instead of just one. This is good enough for hash table * lookup with 2^^64 buckets, or if you want a second hash if you're not * happy with the first, or if you want a probably-unique 64-bit ID for * the key. *pc is better hashmixed than *pb, so use *pc first. If you want * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". */ void hashlittle2( const void *key, /* the key to hash */ size_t length, /* length of the key */ uint32_t *pc, /* IN: primary initval, OUT: primary hash */ uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ { uint32_t a,b,c; /* internal state */ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ /* Set up the internal state */ a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; c += *pb; u.ptr = key; if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; b += k[1]; c += k[2]; hashmix(a,b,c); length -= 12; k += 3; } /*----------------------------- handle the last (probably partial) block */ /* * "k[2]&0xffffff" actually reads beyond the end of the string, but * then masks off the part it's not allowed to read. Because the * string is aligned, the masked-off tail is in the same word as the * rest of the string. Every machine with memory protection I've seen * does it on word boundaries, so is OK with this. But VALGRIND will * still catch it and complain. The masking trick does make the hash * noticably faster for short strings (like English words). */ #ifndef VALGRIND switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=k[1]&0xffffff; a+=k[0]; break; case 6 : b+=k[1]&0xffff; a+=k[0]; break; case 5 : b+=k[1]&0xff; a+=k[0]; break; case 4 : a+=k[0]; break; case 3 : a+=k[0]&0xffffff; break; case 2 : a+=k[0]&0xffff; break; case 1 : a+=k[0]&0xff; break; case 0 : *pc=c; *pb=b; return; /* zero length strings require no hashmixing */ } #else /* make valgrind happy */ { const uint8_t *k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]; break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ case 1 : a+=k8[0]; break; case 0 : *pc=c; *pb=b; return; /* zero length strings require no hashmixing */ } } #endif /* !valgrind */ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ const uint8_t *k8; /*--------------- all but last block: aligned reads and different hashmixing */ while (length > 12) { a += k[0] + (((uint32_t)k[1])<<16); b += k[2] + (((uint32_t)k[3])<<16); c += k[4] + (((uint32_t)k[5])<<16); hashmix(a,b,c); length -= 12; k += 6; } /*----------------------------- handle the last (probably partial) block */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[4]+(((uint32_t)k[5])<<16); b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=k[4]; b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=k[2]; a+=k[0]+(((uint32_t)k[1])<<16); break; case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]+(((uint32_t)k[1])<<16); break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=k[0]; break; case 1 : a+=k8[0]; break; case 0 : *pc=c; *pb=b; return; /* zero length strings require no hashmixing */ } } else { /* need to read the key one byte at a time */ const uint8_t *k = (const uint8_t *)key; /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; a += ((uint32_t)k[1])<<8; a += ((uint32_t)k[2])<<16; a += ((uint32_t)k[3])<<24; b += k[4]; b += ((uint32_t)k[5])<<8; b += ((uint32_t)k[6])<<16; b += ((uint32_t)k[7])<<24; c += k[8]; c += ((uint32_t)k[9])<<8; c += ((uint32_t)k[10])<<16; c += ((uint32_t)k[11])<<24; hashmix(a,b,c); length -= 12; k += 12; } /*-------------------------------- last block: affect all 32 bits of (c) */ switch(length) /* all the case statements fall through */ { case 12: c+=((uint32_t)k[11])<<24; case 11: c+=((uint32_t)k[10])<<16; case 10: c+=((uint32_t)k[9])<<8; case 9 : c+=k[8]; case 8 : b+=((uint32_t)k[7])<<24; case 7 : b+=((uint32_t)k[6])<<16; case 6 : b+=((uint32_t)k[5])<<8; case 5 : b+=k[4]; case 4 : a+=((uint32_t)k[3])<<24; case 3 : a+=((uint32_t)k[2])<<16; case 2 : a+=((uint32_t)k[1])<<8; case 1 : a+=k[0]; break; case 0 : *pc=c; *pb=b; return; /* zero length strings require no hashmixing */ } } hashfinal(a,b,c); *pc=c; *pb=b; }