size_t WeightedCh3HashFunc::operator()(folly::StringPiece key) const {
  auto n = weights_.size();
  checkLogic(n && n <= furc_maximum_pool_size(), "Invalid pool size: {}", n);
  size_t salt = 0;
  size_t index = 0;
  std::string saltedKey;
  auto originalKey = key;
  for (size_t i = 0; i < kNumTries; ++i) {
    index = furc_hash(key.data(), key.size(), n);

    /* Use 32-bit hash, but store in 64-bit ints so that
       we don't have to deal with overflows */
    uint64_t p = folly::hash::SpookyHashV2::Hash32(key.data(), key.size(),
                                                   kHashSeed);
    assert(0 <= weights_[index] && weights_[index] <= 1.0);
    uint64_t w = weights_[index] * std::numeric_limits<uint32_t>::max();

    /* Rehash only if p is out of range */
    if (LIKELY(p < w)) {
      return index;
    }

    /* Change the key to rehash */
    auto s = salt++;
    saltedKey = originalKey.str();
    do {
      saltedKey.push_back(char(s % 10) + '0');
      s /= 10;
    } while (s > 0);

    key = saltedKey;
  }

  return index;
}
Beispiel #2
0
TEST(ch3, weighted_furc_hash_all_75pct) {
  char key[MAX_KEY_LENGTH + 1];
  int len;
  srand(1234567);
  auto weights = std::make_unique<std::array<double, 10000>>();
  weights->fill(0.75);

  size_t sameCount = 0;
  for (uint32_t size = 5000; size <= 10000; ++size) {
    make_random_key(key, MAX_KEY_LENGTH);
    len = strlen(key);
    size_t classic = furc_hash(key, len, size);
    EXPECT_LT(classic, size);
    folly::Range<const double*> weightRange(
        weights->cbegin(), weights->cbegin() + size);
    size_t weighted = facebook::mcrouter::weightedFurcHash(
        folly::StringPiece(key, len), weightRange);
    EXPECT_LT(weighted, size);
    if (classic == weighted) {
      sameCount++;
    }
  }
  // Empirically for the seed, it's 3723, which is roughly 75% of 5000, as
  // expected.
  EXPECT_EQ(3723, sameCount);
}
Beispiel #3
0
TEST(ch3, timing) {
  unsigned i;
  struct timeval lstart, lend;
  uint64_t start, end;
  std::vector<char> keys((MAX_KEY_LENGTH + 1) * NUM_LOOKUPS);
  char* keys_itr;

  printf("Servers:\t\t%d\n", NUM_SERVERS);
  printf("Lookups:\t\t%d\n", NUM_LOOKUPS);

  printf("Generating lookup keys...");
  fflush(stdout);

  srand(time(nullptr));
  for (i = 0, keys_itr = keys.data(); i < NUM_LOOKUPS;
       ++i, keys_itr += MAX_KEY_LENGTH + 1) {
    make_random_key(keys_itr, MAX_KEY_LENGTH);
  }
  printf(" done\n");

  printf("Starting INconsistent hashing timing tests...");
  fflush(stdout);

  gettimeofday(&lstart, nullptr);
  for (i = 0, keys_itr = keys.data(); i < NUM_LOOKUPS;
       ++i, keys_itr += MAX_KEY_LENGTH + 1) {
    uint32_t hash_code = crc32_hash(keys_itr, strlen(keys_itr));
    uint32_t server_num = inconsistent_hashing_lookup(hash_code, NUM_SERVERS);

    (void)server_num; /* to avoid compiler warning */
  }
  gettimeofday(&lend, nullptr);
  printf(" done\n");

  start = ((uint64_t)lstart.tv_sec) * 1000000 + lstart.tv_usec;
  end = ((uint64_t)lend.tv_sec) * 1000000 + lend.tv_usec;
  printf(
      "Lookup:\t\t\t%zdus total\t%0.3fus/query\n",
      (end - start),
      ((float)(end - start)) / NUM_LOOKUPS);

  printf("Starting consistent hashing timing tests...");
  fflush(stdout);

  gettimeofday(&lstart, nullptr);
  for (i = 0, keys_itr = keys.data(); i < NUM_LOOKUPS;
       ++i, keys_itr += MAX_KEY_LENGTH + 1) {
    auto res = furc_hash(keys_itr, strlen(keys_itr), NUM_SERVERS);
    EXPECT_LT(res, NUM_SERVERS);
  }
  gettimeofday(&lend, nullptr);
  printf(" done\n");

  start = ((uint64_t)lstart.tv_sec) * 1000000 + lstart.tv_usec;
  end = ((uint64_t)lend.tv_sec) * 1000000 + lend.tv_usec;
  printf(
      "Lookup:\t\t\t%zdus total\t%0.3fus/query\n",
      (end - start),
      ((float)(end - start)) / NUM_LOOKUPS);
}
Beispiel #4
0
TEST(ch3, weighted_furc_hash_all_one) {
  char key[MAX_KEY_LENGTH + 1];
  int len;
  srand(12345);
  std::array<double, 1000> weights;
  weights.fill(1.0);

  for (uint32_t size = 1; size <= 1000; ++size) {
    make_random_key(key, MAX_KEY_LENGTH);
    len = strlen(key);
    size_t classic = furc_hash(key, len, size);
    EXPECT_LT(classic, size);
    folly::Range<const double*> weightRange(
        weights.cbegin(), weights.cbegin() + size);
    size_t weighted = facebook::mcrouter::weightedFurcHash(
        folly::StringPiece(key, len), weightRange);
    EXPECT_EQ(classic, weighted);
  }
}
Beispiel #5
0
int64_t f_furchash_hphp_ext(CStrRef key, int len, int nPart) {
  len = std::max(std::min(len, key.size()), 0);
  return furc_hash(key, len, nPart);
}
Beispiel #6
0
/**
 * This verifies that
 *   1) the load is evenly balanced across servers.
 *   2) the act of adding a server to a pool will never result in a server
 *      handling keyspace that it previously handled but no longer does.
 *      If this occurs, then stale data may be returned.
 */
TEST(ch3, verify_correctness) {
  uint32_t i, j;
  uint32_t maximum_pool_size = furc_maximum_pool_size();
  char key[MAX_KEY_LENGTH + 1];
  std::vector<uint64_t> pools[NUM_POOLS];
  uint32_t sizes[NUM_POOLS];
  size_t num_pools;
  auto weights = std::make_unique<std::array<double, 1U << 23U>>();
  weights->fill(1.0);

  srand(time(nullptr));

  for (num_pools = 0; /* see end of loop */; ++num_pools) {
    if (num_pools == 0) {
      sizes[num_pools] = 1;
    } else if (num_pools == NUM_POOLS - 1) {
      sizes[num_pools] = maximum_pool_size;
    } else if (num_pools % 2 == 1) { // grow pool size geometrically
      sizes[num_pools] = sizes[num_pools - 1] * drand_in_range(1.5, 2.5);
    } else { // grow pool size arithmetically
      sizes[num_pools] = sizes[num_pools - 1] + rand_in_range(1, 11);
    }

    /* Make sure we don't exceed the maximum pool size. */
    if (sizes[num_pools] > maximum_pool_size) {
      sizes[num_pools] = maximum_pool_size;
    }

    pools[num_pools] = std::vector<uint64_t>(sizes[num_pools]);

    if (sizes[num_pools] == maximum_pool_size)
      break;
  }

  for (i = 0; i < NUM_SAMPLES; ++i) {
    size_t previous_num = -1;
    int len;

    make_random_key(key, MAX_KEY_LENGTH);
    len = strlen(key);

    // hash the same key in each pool, in increasing pool size order
    for (j = 0; j < num_pools; ++j) {
      size_t num = furc_hash(key, len, sizes[j]);
      EXPECT_LT(num, sizes[j]);

      // Verify that the weighted furc yields identical result with weights at 1
      assert(sizes[j] <= weights->size());
      folly::Range<const double*> weightRange(
          weights->cbegin(), weights->cbegin() + sizes[j]);
      size_t weighted = facebook::mcrouter::weightedFurcHash(
          folly::StringPiece(key, len), weightRange);
      EXPECT_EQ(num, weighted);

      ++pools[j][num];

      // make sure that this key either hashes the same server,
      // or hashes to a new server
      if (previous_num != num && j > 0) {
        EXPECT_GE(num, sizes[j - 1]);
      }

      previous_num = num;
    }
  }

  for (i = 0; i < num_pools; ++i) {
    /* Verify that load is evenly distributed. This isn't easy to do
       generally without significantly increasing the runtime by choosing
       a huge NUM_SAMPLES, so just check pools up to 1000 in size. */

    uint32_t pool_size = sizes[i];
    if (pool_size > 1000)
      break;
    double expected_mean = ((double)NUM_SAMPLES) / pool_size;

    double max_diff = 0;
    double sum = 0;
    for (j = 0; j < pool_size; j++) {
      double diff = std::abs(pools[i][j] - expected_mean);
      if (diff > max_diff)
        max_diff = diff;
      sum += pools[i][j];
    }
    double mean = sum / pool_size;
    // expect the sample mean to be within 5% of expected mean
    EXPECT_NEAR(mean, expected_mean, expected_mean * 0.05);

    // expect the maximum deviation from mean to be within 15%
    EXPECT_NEAR(max_diff, 0, mean * 0.15);

    sum = 0;
    for (j = 0; j < pool_size; j++) {
      double diff = pools[i][j] - mean;
      sum += diff * diff;
    }
    double stddev = sqrt(sum / pool_size);
    // expect the standard deviation to be < 5%
    EXPECT_NEAR(stddev, 0, mean * 0.05);
  }
}
Beispiel #7
0
int64_t HHVM_FUNCTION(furchash_hphp_ext, const String& key,
                                         int64_t len, int64_t nPart) {
  len = std::max<int64_t>(std::min<int64_t>(len, key.size()), 0);
  return furc_hash(key.data(), len, nPart);
}