/// Returns the vbucket for a specified key int BaseMemcachedStore::vbucket_for_key(const std::string& key) { // Hash the key and convert the hash to a vbucket. int hash = memcached_generate_hash_value(key.data(), key.length(), MEMCACHED_HASH_MD5); int vbucket = hash & (_vbuckets - 1); TRC_DEBUG("Key %s hashes to vbucket %d via hash 0x%x", key.c_str(), vbucket, hash); return vbucket; }
memcached_return update_continuum(memcached_st *ptr) { uint32_t host_index; uint32_t continuum_index= 0; uint32_t value; memcached_server_st *list; uint32_t pointer_index; uint32_t pointer_counter= 0; uint32_t pointer_per_server= MEMCACHED_POINTS_PER_SERVER; uint32_t pointer_per_hash= 1; uint64_t total_weight= 0; uint64_t is_ketama_weighted= 0; uint64_t is_auto_ejecting= 0; uint32_t points_per_server= 0; uint32_t live_servers= 0; struct timeval now; if (gettimeofday(&now, NULL) != 0) { ptr->cached_errno = errno; return MEMCACHED_ERRNO; } list = ptr->hosts; /* count live servers (those without a retry delay set) */ is_auto_ejecting= memcached_behavior_get(ptr, MEMCACHED_BEHAVIOR_AUTO_EJECT_HOSTS); if (is_auto_ejecting) { live_servers= 0; ptr->next_distribution_rebuild= 0; for (host_index= 0; host_index < ptr->number_of_hosts; ++host_index) { if (list[host_index].next_retry <= now.tv_sec) live_servers++; else { if (ptr->next_distribution_rebuild == 0 || list[host_index].next_retry < ptr->next_distribution_rebuild) ptr->next_distribution_rebuild= list[host_index].next_retry; } } } else live_servers= ptr->number_of_hosts; is_ketama_weighted= memcached_behavior_get(ptr, MEMCACHED_BEHAVIOR_KETAMA_WEIGHTED); points_per_server= (uint32_t) (is_ketama_weighted ? MEMCACHED_POINTS_PER_SERVER_KETAMA : MEMCACHED_POINTS_PER_SERVER); if (live_servers == 0) return MEMCACHED_SUCCESS; if (live_servers > ptr->continuum_count) { memcached_continuum_item_st *new_ptr; new_ptr= ptr->call_realloc(ptr, ptr->continuum, sizeof(memcached_continuum_item_st) * (live_servers + MEMCACHED_CONTINUUM_ADDITION) * points_per_server); if (new_ptr == 0) return MEMCACHED_MEMORY_ALLOCATION_FAILURE; ptr->continuum= new_ptr; ptr->continuum_count= live_servers + MEMCACHED_CONTINUUM_ADDITION; } if (is_ketama_weighted) { for (host_index = 0; host_index < ptr->number_of_hosts; ++host_index) { if (list[host_index].weight == 0) { list[host_index].weight = 1; } if (!is_auto_ejecting || list[host_index].next_retry <= now.tv_sec) total_weight += list[host_index].weight; } } for (host_index = 0; host_index < ptr->number_of_hosts; ++host_index) { if (is_auto_ejecting && list[host_index].next_retry > now.tv_sec) continue; if (is_ketama_weighted) { float pct = (float)list[host_index].weight / (float)total_weight; pointer_per_server= (uint32_t) ((floorf((float) (pct * MEMCACHED_POINTS_PER_SERVER_KETAMA / 4 * (float)live_servers + 0.0000000001))) * 4); pointer_per_hash= 4; #ifdef DEBUG printf("ketama_weighted:%s|%d|%llu|%u\n", list[host_index].hostname, list[host_index].port, (unsigned long long)list[host_index].weight, pointer_per_server); #endif } for (pointer_index= 1; pointer_index <= pointer_per_server / pointer_per_hash; ++pointer_index) { char sort_host[MEMCACHED_MAX_HOST_SORT_LENGTH]= ""; size_t sort_host_length; if (list[host_index].port == MEMCACHED_DEFAULT_PORT) { sort_host_length= (size_t) snprintf(sort_host, MEMCACHED_MAX_HOST_SORT_LENGTH, "%s-%d", list[host_index].hostname, pointer_index - 1); } else { sort_host_length= (size_t) snprintf(sort_host, MEMCACHED_MAX_HOST_SORT_LENGTH, "%s:%d-%d", list[host_index].hostname, list[host_index].port, pointer_index - 1); } WATCHPOINT_ASSERT(sort_host_length); if (is_ketama_weighted) { unsigned int i; for (i = 0; i < pointer_per_hash; i++) { value= ketama_server_hash(sort_host, (uint32_t) sort_host_length, (int) i); ptr->continuum[continuum_index].index= host_index; ptr->continuum[continuum_index++].value= value; } } else { value= memcached_generate_hash_value(sort_host, sort_host_length, ptr->hash_continuum); ptr->continuum[continuum_index].index= host_index; ptr->continuum[continuum_index++].value= value; } } pointer_counter+= pointer_per_server; } WATCHPOINT_ASSERT(ptr); WATCHPOINT_ASSERT(ptr->continuum); WATCHPOINT_ASSERT(ptr->number_of_hosts * MEMCACHED_POINTS_PER_SERVER <= MEMCACHED_CONTINUUM_SIZE); ptr->continuum_points_counter= pointer_counter; qsort(ptr->continuum, ptr->continuum_points_counter, sizeof(memcached_continuum_item_st), continuum_item_cmp); #ifdef DEBUG for (pointer_index= 0; ptr->number_of_hosts && pointer_index < ((live_servers * MEMCACHED_POINTS_PER_SERVER) - 1); pointer_index++) { WATCHPOINT_ASSERT(ptr->continuum[pointer_index].value <= ptr->continuum[pointer_index + 1].value); } #endif return MEMCACHED_SUCCESS; }
/// Gets the set of replicas to use for a read or write operation for the /// specified key. const std::vector<memcached_st*>& MemcachedStore::get_replicas(const std::string& key, Op operation) { MemcachedStore::connection* conn = (connection*)pthread_getspecific(_thread_local); if (conn == NULL) { // Create a new connection structure for this thread. conn = new MemcachedStore::connection; pthread_setspecific(_thread_local, conn); conn->view_number = 0; } if (conn->view_number != _view_number) { // Either the view has changed or has not yet been set up, so set up the // connection and replica structures for this thread. for (size_t ii = 0; ii < conn->st.size(); ++ii) { memcached_free(conn->st[ii]); conn->st[ii] = NULL; } pthread_rwlock_rdlock(&_view_lock); LOG_DEBUG("Set up new view %d for thread", _view_number); // Create a set of memcached_st's one per server. conn->st.resize(_servers.size()); for (size_t ii = 0; ii < _servers.size(); ++ii) { // Create a new memcached_st for this server. Do not specify the server // at this point as memcached() does not support IPv6 addresses. LOG_DEBUG("Setting up server %d for connection %p (%s)", ii, conn, _options.c_str()); conn->st[ii] = memcached(_options.c_str(), _options.length()); LOG_DEBUG("Set up connection %p to server %s", conn->st[ii], _servers[ii].c_str()); // Switch to a longer connect timeout from here on. memcached_behavior_set(conn->st[ii], MEMCACHED_BEHAVIOR_CONNECT_TIMEOUT, 50); // Connect to the server. The address is specified as either <IPv4 address>:<port> // or [<IPv6 address>]:<port>. Look for square brackets to determine whether // this is an IPv6 address. std::vector<std::string> contact_details; size_t close_bracket = _servers[ii].find(']'); if (close_bracket == _servers[ii].npos) { // IPv4 connection. Split the string on the colon. Utils::split_string(_servers[ii], ':', contact_details); if (contact_details.size() != 2) { LOG_ERROR("Malformed contact details %s", _servers[ii].c_str()); break; } } else { // IPv6 connection. Split the string on ']', which removes any white // space from the start and the end, then remove the '[' from the // start of the IP addreess string and the start of the ';' from the start // of the port string. Utils::split_string(_servers[ii], ']', contact_details); if ((contact_details.size() != 2) || (contact_details[0][0] != '[') || (contact_details[1][0] != ':')) { LOG_ERROR("Malformed contact details %s", _servers[ii].c_str()); break; } contact_details[0].erase(contact_details[0].begin()); contact_details[1].erase(contact_details[1].begin()); } LOG_DEBUG("Setting server to IP address %s port %s", contact_details[0].c_str(), contact_details[1].c_str()); int port = atoi(contact_details[1].c_str()); memcached_server_add(conn->st[ii], contact_details[0].c_str(), port); } conn->read_replicas.resize(_vbuckets); conn->write_replicas.resize(_vbuckets); // Now set up the read and write replica sets. for (int ii = 0; ii < _vbuckets; ++ii) { conn->read_replicas[ii].resize(_read_replicas[ii].size()); for (size_t jj = 0; jj < _read_replicas[ii].size(); ++jj) { conn->read_replicas[ii][jj] = conn->st[_read_replicas[ii][jj]]; } conn->write_replicas[ii].resize(_write_replicas[ii].size()); for (size_t jj = 0; jj < _write_replicas[ii].size(); ++jj) { conn->write_replicas[ii][jj] = conn->st[_write_replicas[ii][jj]]; } } // Flag that we are in sync with the latest view. conn->view_number = _view_number; pthread_rwlock_unlock(&_view_lock); } // Hash the key and convert the hash to a vbucket. int hash = memcached_generate_hash_value(key.data(), key.length(), MEMCACHED_HASH_MD5); int vbucket = hash & (_vbuckets - 1); LOG_DEBUG("Key %s hashes to vbucket %d via hash 0x%x", key.c_str(), vbucket, hash); return (operation == Op::READ) ? conn->read_replicas[vbucket] : conn->write_replicas[vbucket]; }