/** Create a new memcached handle * * @param ctx to allocate handle in. * @param instance data. */ static void *mod_conn_create(TALLOC_CTX *ctx, void *instance) { rlm_cache_t *inst = instance; rlm_cache_memcached_t *driver = inst->driver; rlm_cache_memcached_handle_t *mandle; memcached_st *sandle; memcached_return_t ret; sandle = memcached(driver->options, talloc_array_length(driver->options) -1); if (!sandle) { ERROR("rlm_cache_memcached: Failed creating memcached connection"); return NULL; } ret = memcached_version(sandle); if (ret != MEMCACHED_SUCCESS) { ERROR("rlm_cache_memcached: Failed getting server info: %s: %s", memcached_strerror(sandle, ret), memcached_last_error_message(sandle)); memcached_free(sandle); return NULL; } mandle = talloc_zero(ctx, rlm_cache_memcached_handle_t); mandle->handle = sandle; talloc_set_destructor(mandle, _mod_conn_free); return mandle; }
/** Insert a new entry into the data store * * @param inst main rlm_cache instance. * @param request The current request. * @param handle Pointer to memcached handle. * @param c entry to insert. * @return CACHE_OK on success else CACHE_ERROR on error. */ static cache_status_t cache_entry_insert(UNUSED rlm_cache_t *inst, REQUEST *request, rlm_cache_handle_t **handle, rlm_cache_entry_t *c) { rlm_cache_memcached_handle_t *mandle = *handle; memcached_return_t ret; TALLOC_CTX *pool; char *to_store; pool = talloc_pool(NULL, 1024); if (!pool) return CACHE_ERROR; if (cache_serialize(pool, &to_store, c) < 0) { talloc_free(pool); return CACHE_ERROR; } ret = memcached_set(mandle->handle, c->key, talloc_array_length(c->key) - 1, to_store ? to_store : "", to_store ? talloc_array_length(to_store) - 1 : 0, c->expires, 0); talloc_free(pool); if (ret != MEMCACHED_SUCCESS) { RERROR("Failed storing entry with key \"%s\": %s: %s", c->key, memcached_strerror(mandle->handle, ret), memcached_last_error_message(mandle->handle)); return CACHE_ERROR; } return CACHE_OK; }
const std::string Memcached::GetErrMsg(void) { std::string str = m_sErrMsg; if (m_pMemc != NULL) { const char* pMsg = memcached_last_error_message(m_pMemc); str = str + "\tErrMsg:" + pMsg; } return str; }
/** Call delete the cache entry from memcached * * @param inst main rlm_cache instance. * @param request The current request. * @param handle Pointer to memcached handle. * @param c entry to expire. * @return CACHE_OK on success else CACHE_ERROR. */ static cache_status_t cache_entry_expire(UNUSED rlm_cache_t *inst, REQUEST *request, rlm_cache_handle_t **handle, rlm_cache_entry_t *c) { rlm_cache_memcached_handle_t *mandle = *handle; memcached_return_t ret; ret = memcached_delete(mandle->handle, c->key, talloc_array_length(c->key) - 1, 0); if (ret != MEMCACHED_SUCCESS) { RERROR("Failed deleting entry with key \"%s\": %s", c->key, memcached_last_error_message(mandle->handle)); return CACHE_ERROR; } return CACHE_OK; }
/** Locate a cache entry in memcached * * @param out Where to write the pointer to the cach entry. * @param inst main rlm_cache instance. * @param request The current request. * @param handle Pointer to memcached handle. * @param key to search for. * @return CACHE_OK on success CACHE_MISS if no entry found, CACHE_ERROR on error. */ static cache_status_t cache_entry_find(rlm_cache_entry_t **out, UNUSED rlm_cache_t *inst, REQUEST *request, rlm_cache_handle_t **handle, char const *key) { rlm_cache_memcached_handle_t *mandle = *handle; memcached_return_t mret; size_t len; int ret; uint32_t flags; char *from_store; rlm_cache_entry_t *c; from_store = memcached_get(mandle->handle, key, strlen(key), &len, &flags, &mret); if (!from_store) { if (mret == MEMCACHED_NOTFOUND) return CACHE_MISS; RERROR("Failed retrieving entry for key \"%s\": %s: %s", key, memcached_strerror(mandle->handle, mret), memcached_last_error_message(mandle->handle)); return CACHE_ERROR; } RDEBUG2("Retrieved %zu bytes from memcached", len); RDEBUG2("%s", from_store); c = talloc_zero(NULL, rlm_cache_entry_t); ret = cache_deserialize(c, from_store, len); free(from_store); if (ret < 0) { RERROR("%s", fr_strerror()); talloc_free(c); return CACHE_ERROR; } c->key = talloc_strdup(c, key); *out = c; return CACHE_OK; }
/// Update the data for the specified namespace and key. Writes the data /// atomically, so if the underlying data has changed since it was last /// read, the update is rejected and this returns Store::Status::CONTENTION. Store::Status MemcachedStore::set_data(const std::string& table, const std::string& key, const std::string& data, uint64_t cas, int expiry, SAS::TrailId trail) { Store::Status status = Store::Status::OK; LOG_DEBUG("Writing %d bytes to table %s key %s, CAS = %ld, expiry = %d", data.length(), table.c_str(), key.c_str(), cas, expiry); // Construct the fully qualified key. std::string fqkey = table + "\\\\" + key; const char* key_ptr = fqkey.data(); const size_t key_len = fqkey.length(); const std::vector<memcached_st*>& replicas = get_replicas(fqkey, Op::WRITE); if (trail != 0) { SAS::Event start(trail, SASEvent::MEMCACHED_SET_START, 0); start.add_var_param(fqkey); start.add_var_param(data); start.add_static_param(cas); start.add_static_param(expiry); SAS::report_event(start); } LOG_DEBUG("%d write replicas for key %s", replicas.size(), fqkey.c_str()); // Calculate the rough expected expiry time. We store this in the flags // as it may be useful in future for read repair function. uint32_t now = time(NULL); uint32_t exptime = now + expiry; // Memcached uses a flexible mechanism for specifying expiration. // - 0 indicates never expire. // - <= MEMCACHED_EXPIRATION_MAXDELTA indicates a relative (delta) time. // - > MEMCACHED_EXPIRATION_MAXDELTA indicates an absolute time. // Absolute time is the only way to force immediate expiry. Unfortunately, // it's not reliable - see https://github.com/Metaswitch/cpp-common/issues/160 // for details. Instead, we use relative time for future times (expiry > 0) // and the earliest absolute time for immediate expiry (expiry == 0). time_t memcached_expiration = (time_t)((expiry > 0) ? expiry : MEMCACHED_EXPIRATION_MAXDELTA + 1); // First try to write the primary data record to the first responding // server. memcached_return_t rc = MEMCACHED_ERROR; size_t ii; size_t replica_idx; // If we only have one replica, we should try it twice - // libmemcached won't notice a dropped TCP connection until it tries // to make a request on it, and will fail the request then // reconnect, so the second attempt could still work. size_t attempts = (replicas.size() == 1) ? 2: replicas.size(); for (ii = 0; ii < attempts; ++ii) { if ((replicas.size() == 1) && (ii == 1)) { if (rc != MEMCACHED_CONNECTION_FAILURE) { // This is a legitimate error, not a transient server failure, so we // shouldn't retry. break; } replica_idx = 0; LOG_WARNING("Failed to write to sole memcached replica: retrying once"); } else { replica_idx = ii; } LOG_DEBUG("Attempt conditional write to replica %d (connection %p), CAS = %ld", replica_idx, replicas[replica_idx], cas); if (cas == 0) { // New record, so attempt to add. This will fail if someone else // gets there first. rc = memcached_add(replicas[replica_idx], key_ptr, key_len, data.data(), data.length(), memcached_expiration, exptime); } else { // This is an update to an existing record, so use memcached_cas // to make sure it is atomic. rc = memcached_cas(replicas[replica_idx], key_ptr, key_len, data.data(), data.length(), memcached_expiration, exptime, cas); } if (memcached_success(rc)) { LOG_DEBUG("Conditional write succeeded to replica %d", replica_idx); break; } else { LOG_DEBUG("memcached_%s command for %s failed on replica %d, rc = %d (%s), expiry = %d\n%s", (cas == 0) ? "add" : "cas", fqkey.c_str(), replica_idx, rc, memcached_strerror(replicas[replica_idx], rc), expiry, memcached_last_error_message(replicas[replica_idx])); if ((rc == MEMCACHED_NOTSTORED) || (rc == MEMCACHED_DATA_EXISTS)) { if (trail != 0) { SAS::Event err(trail, SASEvent::MEMCACHED_SET_CONTENTION, 0); err.add_var_param(fqkey); SAS::report_event(err); } // A NOT_STORED or EXISTS response indicates a concurrent write failure, // so return this to the application immediately - don't go on to // other replicas. LOG_INFO("Contention writing data for %s to store", fqkey.c_str()); status = Store::Status::DATA_CONTENTION; break; } } } if ((rc == MEMCACHED_SUCCESS) && (replica_idx < replicas.size())) { // Write has succeeded, so write unconditionally (and asynchronously) // to the replicas. for (size_t jj = replica_idx + 1; jj < replicas.size(); ++jj) { LOG_DEBUG("Attempt unconditional write to replica %d", jj); memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 1); memcached_set(replicas[jj], key_ptr, key_len, data.data(), data.length(), memcached_expiration, exptime); memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 0); } } if ((!memcached_success(rc)) && (rc != MEMCACHED_NOTSTORED) && (rc != MEMCACHED_DATA_EXISTS)) { if (trail != 0) { SAS::Event err(trail, SASEvent::MEMCACHED_SET_FAILED, 0); err.add_var_param(fqkey); SAS::report_event(err); } LOG_ERROR("Failed to write data for %s to %d replicas", fqkey.c_str(), replicas.size()); status = Store::Status::ERROR; } return status; }
memcached_return_t BaseMemcachedStore::add_overwriting_tombstone(memcached_st* replica, const char* key_ptr, const size_t key_len, const uint32_t vbucket, const std::string& data, time_t memcached_expiration, uint32_t flags, SAS::TrailId trail) { memcached_return_t rc; uint64_t cas = 0; TRC_DEBUG("Attempting to add data for key %.*s", key_len, key_ptr); // Convert the key into a std::string (sas-client does not like that // key_{ptr,len} are constant). const std::string key(key_ptr, key_len); while (true) { if (cas == 0) { TRC_DEBUG("Attempting memcached ADD command"); rc = memcached_add_vb(replica, key_ptr, key_len, _binary ? vbucket : 0, data.data(), data.length(), memcached_expiration, flags); } else { TRC_DEBUG("Attempting memcached CAS command (cas = %d)", cas); rc = memcached_cas_vb(replica, key_ptr, key_len, _binary ? vbucket : 0, data.data(), data.length(), memcached_expiration, flags, cas); } if ((rc == MEMCACHED_DATA_EXISTS) || (rc == MEMCACHED_NOTSTORED)) { // A record with this key already exists. If it is a tombstone, we need // to overwrite it. Get the record to see what it is. memcached_return_t get_rc; std::string existing_data; TRC_DEBUG("Existing data prevented the ADD/CAS." "Issue GET to see if we need to overwrite a tombstone"); get_rc = get_from_replica(replica, key_ptr, key_len, existing_data, cas); if (memcached_success(get_rc)) { if (existing_data != TOMBSTONE) { // The existing record is not a tombstone. We mustn't overwrite // this, so break out of the loop and return the original return code // from the ADD/CAS. TRC_DEBUG("Found real data. Give up"); break; } else { // The existing record IS a tombstone. Go round the loop again to // overwrite it. `cas` has been set to the cas of the tombstone. TRC_DEBUG("Found a tombstone. Attempt to overwrite"); if (trail != 0) { SAS::Event event(trail, SASEvent::MEMCACHED_SET_BLOCKED_BY_TOMBSTONE, 0); event.add_var_param(key); event.add_static_param(cas); SAS::report_event(event); } } } else if (get_rc == MEMCACHED_NOTFOUND) { // The GET returned that there is no record for this key. This can // happen if the record has expired. We need to try again (it could // have been a tombstone which should not block adds). TRC_DEBUG("GET failed with NOT_FOUND"); if (trail != 0) { SAS::Event event(trail, SASEvent::MEMCACHED_SET_BLOCKED_BY_EXPIRED, 0); event.add_var_param(key); SAS::report_event(event); } } else { // The replica failed. Return the return code from the original ADD/CAS. TRC_DEBUG("GET failed, rc = %d (%s)\n%s", get_rc, memcached_strerror(replica, get_rc), memcached_last_error_message(replica)); break; } } else { TRC_DEBUG("ADD/CAS returned rc = %d (%s)\n%s", rc, memcached_strerror(replica, rc), memcached_last_error_message(replica)); break; } } return rc; }
/// Update the data for the specified namespace and key. Writes the data /// atomically, so if the underlying data has changed since it was last /// read, the update is rejected and this returns Store::Status::CONTENTION. Store::Status BaseMemcachedStore::set_data(const std::string& table, const std::string& key, const std::string& data, uint64_t cas, int expiry, SAS::TrailId trail) { Store::Status status = Store::Status::OK; TRC_DEBUG("Writing %d bytes to table %s key %s, CAS = %ld, expiry = %d", data.length(), table.c_str(), key.c_str(), cas, expiry); // Construct the fully qualified key. std::string fqkey = table + "\\\\" + key; const char* key_ptr = fqkey.data(); const size_t key_len = fqkey.length(); int vbucket = vbucket_for_key(fqkey); const std::vector<memcached_st*>& replicas = get_replicas(vbucket, Op::WRITE); if (trail != 0) { SAS::Event start(trail, SASEvent::MEMCACHED_SET_START, 0); start.add_var_param(fqkey); start.add_var_param(data); start.add_static_param(cas); start.add_static_param(expiry); SAS::report_event(start); } TRC_DEBUG("%d write replicas for key %s", replicas.size(), fqkey.c_str()); // Calculate a timestamp (least-significant 32 bits of milliseconds since the // epoch) for the current time. We store this in the flags field to allow us // to resolve conflicts when resynchronizing between memcached servers. struct timespec ts; (void)clock_gettime(CLOCK_REALTIME, &ts); uint32_t flags = (uint32_t)((ts.tv_sec * 1000) + (ts.tv_nsec / 1000000)); // Memcached uses a flexible mechanism for specifying expiration. // - 0 indicates never expire. // - <= MEMCACHED_EXPIRATION_MAXDELTA indicates a relative (delta) time. // - > MEMCACHED_EXPIRATION_MAXDELTA indicates an absolute time. // Absolute time is the only way to force immediate expiry. Unfortunately, // it's not reliable - see https://github.com/Metaswitch/cpp-common/issues/160 // for details. Instead, we use relative time for future times (expiry > 0) // and the earliest absolute time for immediate expiry (expiry == 0). time_t memcached_expiration = (time_t)((expiry > 0) ? expiry : MEMCACHED_EXPIRATION_MAXDELTA + 1); // First try to write the primary data record to the first responding // server. memcached_return_t rc = MEMCACHED_ERROR; size_t ii; size_t replica_idx; // If we only have one replica, we should try it twice - // libmemcached won't notice a dropped TCP connection until it tries // to make a request on it, and will fail the request then // reconnect, so the second attempt could still work. size_t attempts = (replicas.size() == 1) ? 2: replicas.size(); for (ii = 0; ii < attempts; ++ii) { if ((replicas.size() == 1) && (ii == 1)) { if (rc != MEMCACHED_CONNECTION_FAILURE) { // This is a legitimate error, not a transient server failure, so we // shouldn't retry. break; } replica_idx = 0; TRC_WARNING("Failed to write to sole memcached replica: retrying once"); } else { replica_idx = ii; } TRC_DEBUG("Attempt conditional write to vbucket %d on replica %d (connection %p), CAS = %ld, expiry = %d", vbucket, replica_idx, replicas[replica_idx], cas, expiry); if (cas == 0) { // New record, so attempt to add (but overwrite any tombstones we // encounter). This will fail if someone else got there first and some // data already exists in memcached for this key. rc = add_overwriting_tombstone(replicas[replica_idx], key_ptr, key_len, vbucket, data, memcached_expiration, flags, trail); } else { // This is an update to an existing record, so use memcached_cas // to make sure it is atomic. rc = memcached_cas_vb(replicas[replica_idx], key_ptr, key_len, _binary ? vbucket : 0, data.data(), data.length(), memcached_expiration, flags, cas); if (!memcached_success(rc)) { TRC_DEBUG("memcached_cas command failed, rc = %d (%s)\n%s", rc, memcached_strerror(replicas[replica_idx], rc), memcached_last_error_message(replicas[replica_idx])); } } if (memcached_success(rc)) { TRC_DEBUG("Conditional write succeeded to replica %d", replica_idx); break; } else if ((rc == MEMCACHED_NOTSTORED) || (rc == MEMCACHED_DATA_EXISTS)) { if (trail != 0) { SAS::Event err(trail, SASEvent::MEMCACHED_SET_CONTENTION, 0); err.add_var_param(fqkey); SAS::report_event(err); } // A NOT_STORED or EXISTS response indicates a concurrent write failure, // so return this to the application immediately - don't go on to // other replicas. TRC_INFO("Contention writing data for %s to store", fqkey.c_str()); status = Store::Status::DATA_CONTENTION; break; } } if ((rc == MEMCACHED_SUCCESS) && (replica_idx < replicas.size())) { // Write has succeeded, so write unconditionally (and asynchronously) // to the replicas. for (size_t jj = replica_idx + 1; jj < replicas.size(); ++jj) { TRC_DEBUG("Attempt unconditional write to replica %d", jj); memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 1); memcached_set_vb(replicas[jj], key_ptr, key_len, _binary ? vbucket : 0, data.data(), data.length(), memcached_expiration, flags); memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 0); } } if ((!memcached_success(rc)) && (rc != MEMCACHED_NOTSTORED) && (rc != MEMCACHED_DATA_EXISTS)) { if (trail != 0) { SAS::Event err(trail, SASEvent::MEMCACHED_SET_FAILED, 0); err.add_var_param(fqkey); SAS::report_event(err); } update_vbucket_comm_state(vbucket, FAILED); if (_comm_monitor) { _comm_monitor->inform_failure(); } TRC_ERROR("Failed to write data for %s to %d replicas", fqkey.c_str(), replicas.size()); status = Store::Status::ERROR; } else { update_vbucket_comm_state(vbucket, OK); if (_comm_monitor) { _comm_monitor->inform_success(); } } return status; }