Пример #1
0
size_t WeightedCh3HashFunc::operator()(folly::StringPiece key) const {
  auto n = weights_.size();
  checkLogic(n && n <= furc_maximum_pool_size(), "Invalid pool size: {}", n);
  size_t salt = 0;
  size_t index = 0;
  std::string saltedKey;
  auto originalKey = key;
  for (size_t i = 0; i < kNumTries; ++i) {
    index = furc_hash(key.data(), key.size(), n);

    /* Use 32-bit hash, but store in 64-bit ints so that
       we don't have to deal with overflows */
    uint64_t p = folly::hash::SpookyHashV2::Hash32(key.data(), key.size(),
                                                   kHashSeed);
    assert(0 <= weights_[index] && weights_[index] <= 1.0);
    uint64_t w = weights_[index] * std::numeric_limits<uint32_t>::max();

    /* Rehash only if p is out of range */
    if (LIKELY(p < w)) {
      return index;
    }

    /* Change the key to rehash */
    auto s = salt++;
    saltedKey = originalKey.str();
    do {
      saltedKey.push_back(char(s % 10) + '0');
      s /= 10;
    } while (s > 0);

    key = saltedKey;
  }

  return index;
}
Пример #2
0
uint32_t furc_hash_internal(const char* const key, const size_t len, const uint32_t m) {
    uint32_t tries;
    uint32_t d;
    uint32_t num;
    uint32_t i;
    uint32_t a;
    uint64_t hash[FURC_CACHE_SIZE];
    int32_t old_ord;

    assert(m <= furc_maximum_pool_size());

    if (m <= 1) {
        return 0;
    }

    furc_get_bit(NULL, 0, 0, hash, &old_ord);
    for (d = 0; m > (1ul << d); d++)
        ;

    a = d;
    for (tries = 0; tries < MAX_TRIES; tries++) {
        while (!furc_get_bit(key, len, a, hash, &old_ord)) {
            if (--d == 0) {
                return 0;
            }
            a = d;
        }
        a += FURC_SHIFT;
        num = 1;
        for (i = 0; i < d-1; i++) {
            num = (num << 1) | furc_get_bit(key, len, a, hash, &old_ord);
            a += FURC_SHIFT;
        }
        if (num < m) {
            return num;
        }
    }

    // Give up; return 0, which is a legal value in all cases.
    return 0;
}
Пример #3
0
/**
 * This verifies that
 *   1) the load is evenly balanced across servers.
 *   2) the act of adding a server to a pool will never result in a server
 *      handling keyspace that it previously handled but no longer does.
 *      If this occurs, then stale data may be returned.
 */
TEST(ch3, verify_correctness) {
  uint32_t i, j;
  uint32_t maximum_pool_size = furc_maximum_pool_size();
  char key[MAX_KEY_LENGTH + 1];
  std::vector<uint64_t> pools[NUM_POOLS];
  uint32_t sizes[NUM_POOLS];
  size_t num_pools;
  auto weights = std::make_unique<std::array<double, 1U << 23U>>();
  weights->fill(1.0);

  srand(time(nullptr));

  for (num_pools = 0; /* see end of loop */; ++num_pools) {
    if (num_pools == 0) {
      sizes[num_pools] = 1;
    } else if (num_pools == NUM_POOLS - 1) {
      sizes[num_pools] = maximum_pool_size;
    } else if (num_pools % 2 == 1) { // grow pool size geometrically
      sizes[num_pools] = sizes[num_pools - 1] * drand_in_range(1.5, 2.5);
    } else { // grow pool size arithmetically
      sizes[num_pools] = sizes[num_pools - 1] + rand_in_range(1, 11);
    }

    /* Make sure we don't exceed the maximum pool size. */
    if (sizes[num_pools] > maximum_pool_size) {
      sizes[num_pools] = maximum_pool_size;
    }

    pools[num_pools] = std::vector<uint64_t>(sizes[num_pools]);

    if (sizes[num_pools] == maximum_pool_size)
      break;
  }

  for (i = 0; i < NUM_SAMPLES; ++i) {
    size_t previous_num = -1;
    int len;

    make_random_key(key, MAX_KEY_LENGTH);
    len = strlen(key);

    // hash the same key in each pool, in increasing pool size order
    for (j = 0; j < num_pools; ++j) {
      size_t num = furc_hash(key, len, sizes[j]);
      EXPECT_LT(num, sizes[j]);

      // Verify that the weighted furc yields identical result with weights at 1
      assert(sizes[j] <= weights->size());
      folly::Range<const double*> weightRange(
          weights->cbegin(), weights->cbegin() + sizes[j]);
      size_t weighted = facebook::mcrouter::weightedFurcHash(
          folly::StringPiece(key, len), weightRange);
      EXPECT_EQ(num, weighted);

      ++pools[j][num];

      // make sure that this key either hashes the same server,
      // or hashes to a new server
      if (previous_num != num && j > 0) {
        EXPECT_GE(num, sizes[j - 1]);
      }

      previous_num = num;
    }
  }

  for (i = 0; i < num_pools; ++i) {
    /* Verify that load is evenly distributed. This isn't easy to do
       generally without significantly increasing the runtime by choosing
       a huge NUM_SAMPLES, so just check pools up to 1000 in size. */

    uint32_t pool_size = sizes[i];
    if (pool_size > 1000)
      break;
    double expected_mean = ((double)NUM_SAMPLES) / pool_size;

    double max_diff = 0;
    double sum = 0;
    for (j = 0; j < pool_size; j++) {
      double diff = std::abs(pools[i][j] - expected_mean);
      if (diff > max_diff)
        max_diff = diff;
      sum += pools[i][j];
    }
    double mean = sum / pool_size;
    // expect the sample mean to be within 5% of expected mean
    EXPECT_NEAR(mean, expected_mean, expected_mean * 0.05);

    // expect the maximum deviation from mean to be within 15%
    EXPECT_NEAR(max_diff, 0, mean * 0.15);

    sum = 0;
    for (j = 0; j < pool_size; j++) {
      double diff = pools[i][j] - mean;
      sum += diff * diff;
    }
    double stddev = sqrt(sum / pool_size);
    // expect the standard deviation to be < 5%
    EXPECT_NEAR(stddev, 0, mean * 0.05);
  }
}