size_t WeightedCh3HashFunc::operator()(folly::StringPiece key) const { auto n = weights_.size(); checkLogic(n && n <= furc_maximum_pool_size(), "Invalid pool size: {}", n); size_t salt = 0; size_t index = 0; std::string saltedKey; auto originalKey = key; for (size_t i = 0; i < kNumTries; ++i) { index = furc_hash(key.data(), key.size(), n); /* Use 32-bit hash, but store in 64-bit ints so that we don't have to deal with overflows */ uint64_t p = folly::hash::SpookyHashV2::Hash32(key.data(), key.size(), kHashSeed); assert(0 <= weights_[index] && weights_[index] <= 1.0); uint64_t w = weights_[index] * std::numeric_limits<uint32_t>::max(); /* Rehash only if p is out of range */ if (LIKELY(p < w)) { return index; } /* Change the key to rehash */ auto s = salt++; saltedKey = originalKey.str(); do { saltedKey.push_back(char(s % 10) + '0'); s /= 10; } while (s > 0); key = saltedKey; } return index; }
uint32_t furc_hash_internal(const char* const key, const size_t len, const uint32_t m) { uint32_t tries; uint32_t d; uint32_t num; uint32_t i; uint32_t a; uint64_t hash[FURC_CACHE_SIZE]; int32_t old_ord; assert(m <= furc_maximum_pool_size()); if (m <= 1) { return 0; } furc_get_bit(NULL, 0, 0, hash, &old_ord); for (d = 0; m > (1ul << d); d++) ; a = d; for (tries = 0; tries < MAX_TRIES; tries++) { while (!furc_get_bit(key, len, a, hash, &old_ord)) { if (--d == 0) { return 0; } a = d; } a += FURC_SHIFT; num = 1; for (i = 0; i < d-1; i++) { num = (num << 1) | furc_get_bit(key, len, a, hash, &old_ord); a += FURC_SHIFT; } if (num < m) { return num; } } // Give up; return 0, which is a legal value in all cases. return 0; }
/** * This verifies that * 1) the load is evenly balanced across servers. * 2) the act of adding a server to a pool will never result in a server * handling keyspace that it previously handled but no longer does. * If this occurs, then stale data may be returned. */ TEST(ch3, verify_correctness) { uint32_t i, j; uint32_t maximum_pool_size = furc_maximum_pool_size(); char key[MAX_KEY_LENGTH + 1]; std::vector<uint64_t> pools[NUM_POOLS]; uint32_t sizes[NUM_POOLS]; size_t num_pools; auto weights = std::make_unique<std::array<double, 1U << 23U>>(); weights->fill(1.0); srand(time(nullptr)); for (num_pools = 0; /* see end of loop */; ++num_pools) { if (num_pools == 0) { sizes[num_pools] = 1; } else if (num_pools == NUM_POOLS - 1) { sizes[num_pools] = maximum_pool_size; } else if (num_pools % 2 == 1) { // grow pool size geometrically sizes[num_pools] = sizes[num_pools - 1] * drand_in_range(1.5, 2.5); } else { // grow pool size arithmetically sizes[num_pools] = sizes[num_pools - 1] + rand_in_range(1, 11); } /* Make sure we don't exceed the maximum pool size. */ if (sizes[num_pools] > maximum_pool_size) { sizes[num_pools] = maximum_pool_size; } pools[num_pools] = std::vector<uint64_t>(sizes[num_pools]); if (sizes[num_pools] == maximum_pool_size) break; } for (i = 0; i < NUM_SAMPLES; ++i) { size_t previous_num = -1; int len; make_random_key(key, MAX_KEY_LENGTH); len = strlen(key); // hash the same key in each pool, in increasing pool size order for (j = 0; j < num_pools; ++j) { size_t num = furc_hash(key, len, sizes[j]); EXPECT_LT(num, sizes[j]); // Verify that the weighted furc yields identical result with weights at 1 assert(sizes[j] <= weights->size()); folly::Range<const double*> weightRange( weights->cbegin(), weights->cbegin() + sizes[j]); size_t weighted = facebook::mcrouter::weightedFurcHash( folly::StringPiece(key, len), weightRange); EXPECT_EQ(num, weighted); ++pools[j][num]; // make sure that this key either hashes the same server, // or hashes to a new server if (previous_num != num && j > 0) { EXPECT_GE(num, sizes[j - 1]); } previous_num = num; } } for (i = 0; i < num_pools; ++i) { /* Verify that load is evenly distributed. This isn't easy to do generally without significantly increasing the runtime by choosing a huge NUM_SAMPLES, so just check pools up to 1000 in size. */ uint32_t pool_size = sizes[i]; if (pool_size > 1000) break; double expected_mean = ((double)NUM_SAMPLES) / pool_size; double max_diff = 0; double sum = 0; for (j = 0; j < pool_size; j++) { double diff = std::abs(pools[i][j] - expected_mean); if (diff > max_diff) max_diff = diff; sum += pools[i][j]; } double mean = sum / pool_size; // expect the sample mean to be within 5% of expected mean EXPECT_NEAR(mean, expected_mean, expected_mean * 0.05); // expect the maximum deviation from mean to be within 15% EXPECT_NEAR(max_diff, 0, mean * 0.15); sum = 0; for (j = 0; j < pool_size; j++) { double diff = pools[i][j] - mean; sum += diff * diff; } double stddev = sqrt(sum / pool_size); // expect the standard deviation to be < 5% EXPECT_NEAR(stddev, 0, mean * 0.05); } }