size_t WeightedCh3HashFunc::operator()(folly::StringPiece key) const { auto n = weights_.size(); checkLogic(n && n <= furc_maximum_pool_size(), "Invalid pool size: {}", n); size_t salt = 0; size_t index = 0; std::string saltedKey; auto originalKey = key; for (size_t i = 0; i < kNumTries; ++i) { index = furc_hash(key.data(), key.size(), n); /* Use 32-bit hash, but store in 64-bit ints so that we don't have to deal with overflows */ uint64_t p = folly::hash::SpookyHashV2::Hash32(key.data(), key.size(), kHashSeed); assert(0 <= weights_[index] && weights_[index] <= 1.0); uint64_t w = weights_[index] * std::numeric_limits<uint32_t>::max(); /* Rehash only if p is out of range */ if (LIKELY(p < w)) { return index; } /* Change the key to rehash */ auto s = salt++; saltedKey = originalKey.str(); do { saltedKey.push_back(char(s % 10) + '0'); s /= 10; } while (s > 0); key = saltedKey; } return index; }
TEST(ch3, weighted_furc_hash_all_75pct) { char key[MAX_KEY_LENGTH + 1]; int len; srand(1234567); auto weights = std::make_unique<std::array<double, 10000>>(); weights->fill(0.75); size_t sameCount = 0; for (uint32_t size = 5000; size <= 10000; ++size) { make_random_key(key, MAX_KEY_LENGTH); len = strlen(key); size_t classic = furc_hash(key, len, size); EXPECT_LT(classic, size); folly::Range<const double*> weightRange( weights->cbegin(), weights->cbegin() + size); size_t weighted = facebook::mcrouter::weightedFurcHash( folly::StringPiece(key, len), weightRange); EXPECT_LT(weighted, size); if (classic == weighted) { sameCount++; } } // Empirically for the seed, it's 3723, which is roughly 75% of 5000, as // expected. EXPECT_EQ(3723, sameCount); }
TEST(ch3, timing) { unsigned i; struct timeval lstart, lend; uint64_t start, end; std::vector<char> keys((MAX_KEY_LENGTH + 1) * NUM_LOOKUPS); char* keys_itr; printf("Servers:\t\t%d\n", NUM_SERVERS); printf("Lookups:\t\t%d\n", NUM_LOOKUPS); printf("Generating lookup keys..."); fflush(stdout); srand(time(nullptr)); for (i = 0, keys_itr = keys.data(); i < NUM_LOOKUPS; ++i, keys_itr += MAX_KEY_LENGTH + 1) { make_random_key(keys_itr, MAX_KEY_LENGTH); } printf(" done\n"); printf("Starting INconsistent hashing timing tests..."); fflush(stdout); gettimeofday(&lstart, nullptr); for (i = 0, keys_itr = keys.data(); i < NUM_LOOKUPS; ++i, keys_itr += MAX_KEY_LENGTH + 1) { uint32_t hash_code = crc32_hash(keys_itr, strlen(keys_itr)); uint32_t server_num = inconsistent_hashing_lookup(hash_code, NUM_SERVERS); (void)server_num; /* to avoid compiler warning */ } gettimeofday(&lend, nullptr); printf(" done\n"); start = ((uint64_t)lstart.tv_sec) * 1000000 + lstart.tv_usec; end = ((uint64_t)lend.tv_sec) * 1000000 + lend.tv_usec; printf( "Lookup:\t\t\t%zdus total\t%0.3fus/query\n", (end - start), ((float)(end - start)) / NUM_LOOKUPS); printf("Starting consistent hashing timing tests..."); fflush(stdout); gettimeofday(&lstart, nullptr); for (i = 0, keys_itr = keys.data(); i < NUM_LOOKUPS; ++i, keys_itr += MAX_KEY_LENGTH + 1) { auto res = furc_hash(keys_itr, strlen(keys_itr), NUM_SERVERS); EXPECT_LT(res, NUM_SERVERS); } gettimeofday(&lend, nullptr); printf(" done\n"); start = ((uint64_t)lstart.tv_sec) * 1000000 + lstart.tv_usec; end = ((uint64_t)lend.tv_sec) * 1000000 + lend.tv_usec; printf( "Lookup:\t\t\t%zdus total\t%0.3fus/query\n", (end - start), ((float)(end - start)) / NUM_LOOKUPS); }
TEST(ch3, weighted_furc_hash_all_one) { char key[MAX_KEY_LENGTH + 1]; int len; srand(12345); std::array<double, 1000> weights; weights.fill(1.0); for (uint32_t size = 1; size <= 1000; ++size) { make_random_key(key, MAX_KEY_LENGTH); len = strlen(key); size_t classic = furc_hash(key, len, size); EXPECT_LT(classic, size); folly::Range<const double*> weightRange( weights.cbegin(), weights.cbegin() + size); size_t weighted = facebook::mcrouter::weightedFurcHash( folly::StringPiece(key, len), weightRange); EXPECT_EQ(classic, weighted); } }
int64_t f_furchash_hphp_ext(CStrRef key, int len, int nPart) { len = std::max(std::min(len, key.size()), 0); return furc_hash(key, len, nPart); }
/** * This verifies that * 1) the load is evenly balanced across servers. * 2) the act of adding a server to a pool will never result in a server * handling keyspace that it previously handled but no longer does. * If this occurs, then stale data may be returned. */ TEST(ch3, verify_correctness) { uint32_t i, j; uint32_t maximum_pool_size = furc_maximum_pool_size(); char key[MAX_KEY_LENGTH + 1]; std::vector<uint64_t> pools[NUM_POOLS]; uint32_t sizes[NUM_POOLS]; size_t num_pools; auto weights = std::make_unique<std::array<double, 1U << 23U>>(); weights->fill(1.0); srand(time(nullptr)); for (num_pools = 0; /* see end of loop */; ++num_pools) { if (num_pools == 0) { sizes[num_pools] = 1; } else if (num_pools == NUM_POOLS - 1) { sizes[num_pools] = maximum_pool_size; } else if (num_pools % 2 == 1) { // grow pool size geometrically sizes[num_pools] = sizes[num_pools - 1] * drand_in_range(1.5, 2.5); } else { // grow pool size arithmetically sizes[num_pools] = sizes[num_pools - 1] + rand_in_range(1, 11); } /* Make sure we don't exceed the maximum pool size. */ if (sizes[num_pools] > maximum_pool_size) { sizes[num_pools] = maximum_pool_size; } pools[num_pools] = std::vector<uint64_t>(sizes[num_pools]); if (sizes[num_pools] == maximum_pool_size) break; } for (i = 0; i < NUM_SAMPLES; ++i) { size_t previous_num = -1; int len; make_random_key(key, MAX_KEY_LENGTH); len = strlen(key); // hash the same key in each pool, in increasing pool size order for (j = 0; j < num_pools; ++j) { size_t num = furc_hash(key, len, sizes[j]); EXPECT_LT(num, sizes[j]); // Verify that the weighted furc yields identical result with weights at 1 assert(sizes[j] <= weights->size()); folly::Range<const double*> weightRange( weights->cbegin(), weights->cbegin() + sizes[j]); size_t weighted = facebook::mcrouter::weightedFurcHash( folly::StringPiece(key, len), weightRange); EXPECT_EQ(num, weighted); ++pools[j][num]; // make sure that this key either hashes the same server, // or hashes to a new server if (previous_num != num && j > 0) { EXPECT_GE(num, sizes[j - 1]); } previous_num = num; } } for (i = 0; i < num_pools; ++i) { /* Verify that load is evenly distributed. This isn't easy to do generally without significantly increasing the runtime by choosing a huge NUM_SAMPLES, so just check pools up to 1000 in size. */ uint32_t pool_size = sizes[i]; if (pool_size > 1000) break; double expected_mean = ((double)NUM_SAMPLES) / pool_size; double max_diff = 0; double sum = 0; for (j = 0; j < pool_size; j++) { double diff = std::abs(pools[i][j] - expected_mean); if (diff > max_diff) max_diff = diff; sum += pools[i][j]; } double mean = sum / pool_size; // expect the sample mean to be within 5% of expected mean EXPECT_NEAR(mean, expected_mean, expected_mean * 0.05); // expect the maximum deviation from mean to be within 15% EXPECT_NEAR(max_diff, 0, mean * 0.15); sum = 0; for (j = 0; j < pool_size; j++) { double diff = pools[i][j] - mean; sum += diff * diff; } double stddev = sqrt(sum / pool_size); // expect the standard deviation to be < 5% EXPECT_NEAR(stddev, 0, mean * 0.05); } }
int64_t HHVM_FUNCTION(furchash_hphp_ext, const String& key, int64_t len, int64_t nPart) { len = std::max<int64_t>(std::min<int64_t>(len, key.size()), 0); return furc_hash(key.data(), len, nPart); }