Exemplo n.º 1
0
void BaseMemcachedStore::delete_without_tombstone(const std::string& fqkey,
                                                  const std::vector<memcached_st*>& replicas,
                                                  SAS::TrailId trail)
{
  if (trail != 0)
  {
    SAS::Event event(trail, SASEvent::MEMCACHED_DELETE, 0);
    event.add_var_param(fqkey);
    SAS::report_event(event);
  }

  const char* key_ptr = fqkey.data();
  const size_t key_len = fqkey.length();

  for (size_t ii = 0; ii < replicas.size(); ++ii)
  {
    TRC_DEBUG("Attempt delete to replica %d (connection %p)",
              ii,
              replicas[ii]);

    memcached_return_t rc = memcached_delete(replicas[ii],
                                             key_ptr,
                                             key_len,
                                             0);

    if (!memcached_success(rc))
    {
      log_delete_failure(fqkey, ii, replicas.size(), trail, 0);
    }
  }
}
Exemplo n.º 2
0
  /// Retrieve the AoR data for a given SIP URI, creating it if there isn't
  /// any already, and returning NULL if we can't get a connection.
  AoR* MemcachedStore::get_aor_data(const std::string& aor_id)
                                    ///< the SIP URI
  {
    memcached_return_t rc;
    MemcachedAoR* aor_data = NULL;

    // Try to get a connection
    struct timespec wait_time;
    wait_time.tv_sec = 0;
    wait_time.tv_nsec = 100 * 1000 * 1000;
    memcached_st* st = memcached_pool_fetch(_pool, &wait_time, &rc);

    if (st != NULL)
    {
      // Got one: use it.
      const char* key_ptr = aor_id.data();
      const size_t key_len = aor_id.length();
      rc = memcached_mget(st, &key_ptr, &key_len, 1);
      if (memcached_success(rc))
      {
        memcached_result_st result;
        memcached_result_create(st, &result);
        memcached_fetch_result(st, &result, &rc);

        if (memcached_success(rc))
        {
          aor_data = deserialize_aor(std::string(memcached_result_value(&result), memcached_result_length(&result)));
          aor_data->set_cas(memcached_result_cas(&result));
          int now = time(NULL);
          expire_bindings(aor_data, now);
        }
        else
        {
          // AoR does not exist, so create it.
          aor_data = new MemcachedAoR();
        }
      }
      memcached_pool_release(_pool, st);
    }

    return (AoR*)aor_data;
  }
Exemplo n.º 3
0
memcached_return_t BaseMemcachedStore::get_from_replica(memcached_st* replica,
                                                        const char* key_ptr,
                                                        const size_t key_len,
                                                        std::string& data,
                                                        uint64_t& cas)
{
  memcached_return_t rc = MEMCACHED_ERROR;
  cas = 0;

  // We must use memcached_mget because memcached_get does not retrieve CAS
  // values.
  rc = memcached_mget(replica, &key_ptr, &key_len, 1);

  if (memcached_success(rc))
  {
    // memcached_mget command was successful, so retrieve the result.
    TRC_DEBUG("Fetch result");
    memcached_result_st result;
    memcached_result_create(replica, &result);
    memcached_fetch_result(replica, &result, &rc);

    if (memcached_success(rc))
    {
      // Found a record, so exit the read loop.
      TRC_DEBUG("Found record on replica");

      // Copy the record into a string. std::string::assign copies its
      // arguments when used with a char*, so we can free the result
      // afterwards.
      data.assign(memcached_result_value(&result),
                  memcached_result_length(&result));
      cas = memcached_result_cas(&result);
    }

    memcached_result_free(&result);
  }

  return rc;
}
Exemplo n.º 4
0
/// Delete the data for the specified namespace and key.  Writes the data
/// unconditionally, so CAS is not needed.
Store::Status MemcachedStore::delete_data(const std::string& table,
                                          const std::string& key,
                                          SAS::TrailId trail)
{
  Store::Status status = Store::Status::OK;

  LOG_DEBUG("Deleting key %s from table %s", key.c_str(), table.c_str());

  // Construct the fully qualified key.
  std::string fqkey = table + "\\\\" + key;
  const char* key_ptr = fqkey.data();
  const size_t key_len = fqkey.length();

  // Delete from the read replicas - read replicas are a superset of the write replicas
  const std::vector<memcached_st*>& replicas = get_replicas(fqkey, Op::READ);

  if (trail != 0)
  {
    SAS::Event start(trail, SASEvent::MEMCACHED_DELETE, 0);
    start.add_var_param(fqkey);
    SAS::report_event(start);
  }

  LOG_DEBUG("Deleting from the %d read replicas for key %s", replicas.size(), fqkey.c_str());

  // First try to write the primary data record to the first responding
  // server.
  memcached_return_t rc = MEMCACHED_ERROR;
  size_t ii;
  for (ii = 0; ii < replicas.size(); ++ii)
  {
    LOG_DEBUG("Attempt delete to replica %d (connection %p)",
              ii,
              replicas[ii]);

    rc = memcached_delete(replicas[ii],
                          key_ptr,
                          key_len,
                          0);

    if (!memcached_success(rc))
    {
      // Deletes are unconditional so this should never happen
      LOG_ERROR("Delete failed to replica %d", ii);
    }
  }

  return status;
}
Exemplo n.º 5
0
bool MemcachedSessionManager::load_session(std::string const &sessionid, Session &session)
{
    memcached_return_t rc;
    size_t value_length;
    char *value = memcached_get(memcached_conn,
            sessionid.data(), sessionid.length(),
            &value_length, 0, &rc);
    if (memcached_success(rc))
    {
        std::stringstream valuestream(std::string(value, value_length));
        parse_pairs(valuestream, '\n', session.data());
        uint64_t userid = 0;
        try {
            userid = std::stoi(session.get("userid"));
        } catch (std::exception const &ex)
        {
            LOG_MESSAGE_WARN("Invalid userid in session data");
            return false;
        }
        session.set(User::find(database(), userid), sessionid);
    }
    if (value != NULL) free(value);
    return memcached_success(rc);
}
Exemplo n.º 6
0
void BaseMemcachedStore::delete_with_tombstone(const std::string& fqkey,
                                               const std::vector<memcached_st*>& replicas,
                                               SAS::TrailId trail)
{
  if (trail != 0)
  {
    SAS::Event event(trail, SASEvent::MEMCACHED_DELETE, 0);
    event.add_var_param(fqkey);
    event.add_static_param(_tombstone_lifetime);
    SAS::report_event(event);
  }

  const char* key_ptr = fqkey.data();
  const size_t key_len = fqkey.length();

  // Calculate a timestamp (least-significant 32 bits of milliseconds since the
  // epoch) for the current time.  We store this in the flags field to allow us
  // to resolve conflicts when resynchronizing between memcached servers.
  struct timespec ts;
  (void)clock_gettime(CLOCK_REALTIME, &ts);
  uint32_t flags = (uint32_t)((ts.tv_sec * 1000) + (ts.tv_nsec / 1000000));

  // Calculate the vbucket for this key.
  int vbucket = vbucket_for_key(fqkey);

  for (size_t ii = 0; ii < replicas.size(); ++ii)
  {
    TRC_DEBUG("Attempt write tombstone to replica %d (connection %p)",
              ii,
              replicas[ii]);

    memcached_return_t rc = memcached_set_vb(replicas[ii],
                                             key_ptr,
                                             key_len,
                                             _binary ? vbucket : 0,
                                             TOMBSTONE.data(),
                                             TOMBSTONE.length(),
                                             _tombstone_lifetime,
                                             flags);

    if (!memcached_success(rc))
    {
      log_delete_failure(fqkey, ii, replicas.size(), trail, 1);
    }
  }
}
Exemplo n.º 7
0
  /// Update the data for a particular address of record.  Writes the data
  /// atomically.  If the underlying data has changed since it was last
  /// read, the update is rejected and this returns false; if the update
  /// succeeds, this returns true.
  ///
  /// If a connection cannot be obtained, returns a random boolean based on
  /// data found on the call stack at the point of entry.
  bool MemcachedStore::set_aor_data(const std::string& aor_id,
                                    ///< the SIP URI
                                    AoR* data)
                                    ///< the data to store
  {
    memcached_return_t rc;
    MemcachedAoR* aor_data = (MemcachedAoR*)data;

    // Try to get a connection.
    struct timespec wait_time;
    wait_time.tv_sec = 0;
    wait_time.tv_nsec = 100 * 1000 * 1000;
    memcached_st* st = memcached_pool_fetch(_pool, &wait_time, &rc);

    if (st != NULL)
    {
      // Got one: use it.
      //
      // Expire any old bindings before writing to the server.  In theory,
      // if there are no bindings left we could delete the entry, but this
      // may cause concurrency problems because memcached does not support
      // cas on delete operations.  In this case we do a memcached_cas with
      // an effectively immediate expiry time.
      int now = time(NULL);
      int max_expires = expire_bindings(aor_data, now);
      std::string value = serialize_aor(aor_data);
      if (aor_data->get_cas() == 0)
      {
        // New record, so attempt to add.  This will fail if someone else
        // gets there first.
        rc = memcached_add(st, aor_id.data(), aor_id.length(), value.data(), value.length(), max_expires, 0);
      }
      else
      {
        // This is an update to an existing record, so use memcached_cas
        // to make sure it is atomic.
        rc = memcached_cas(st, aor_id.data(), aor_id.length(), value.data(), value.length(), max_expires, 0, aor_data->get_cas());
      }

      memcached_pool_release(_pool, st);
    }

    return memcached_success(rc);
  }
Exemplo n.º 8
0
/// Update the data for the specified namespace and key.  Writes the data
/// atomically, so if the underlying data has changed since it was last
/// read, the update is rejected and this returns Store::Status::CONTENTION.
Store::Status MemcachedStore::set_data(const std::string& table,
                                       const std::string& key,
                                       const std::string& data,
                                       uint64_t cas,
                                       int expiry,
                                       SAS::TrailId trail)
{
  Store::Status status = Store::Status::OK;

  LOG_DEBUG("Writing %d bytes to table %s key %s, CAS = %ld, expiry = %d",
            data.length(), table.c_str(), key.c_str(), cas, expiry);

  // Construct the fully qualified key.
  std::string fqkey = table + "\\\\" + key;
  const char* key_ptr = fqkey.data();
  const size_t key_len = fqkey.length();

  const std::vector<memcached_st*>& replicas = get_replicas(fqkey, Op::WRITE);

  if (trail != 0)
  {
    SAS::Event start(trail, SASEvent::MEMCACHED_SET_START, 0);
    start.add_var_param(fqkey);
    start.add_var_param(data);
    start.add_static_param(cas);
    start.add_static_param(expiry);
    SAS::report_event(start);
  }

  LOG_DEBUG("%d write replicas for key %s", replicas.size(), fqkey.c_str());

  // Calculate the rough expected expiry time.  We store this in the flags
  // as it may be useful in future for read repair function.
  uint32_t now = time(NULL);
  uint32_t exptime = now + expiry;

  // Memcached uses a flexible mechanism for specifying expiration.
  // - 0 indicates never expire.
  // - <= MEMCACHED_EXPIRATION_MAXDELTA indicates a relative (delta) time.
  // - > MEMCACHED_EXPIRATION_MAXDELTA indicates an absolute time.
  // Absolute time is the only way to force immediate expiry.  Unfortunately,
  // it's not reliable - see https://github.com/Metaswitch/cpp-common/issues/160
  // for details.  Instead, we use relative time for future times (expiry > 0)
  // and the earliest absolute time for immediate expiry (expiry == 0).
  time_t memcached_expiration =
    (time_t)((expiry > 0) ? expiry : MEMCACHED_EXPIRATION_MAXDELTA + 1);

  // First try to write the primary data record to the first responding
  // server.
  memcached_return_t rc = MEMCACHED_ERROR;
  size_t ii;
  size_t replica_idx;
  // If we only have one replica, we should try it twice -
  // libmemcached won't notice a dropped TCP connection until it tries
  // to make a request on it, and will fail the request then
  // reconnect, so the second attempt could still work.
  size_t attempts = (replicas.size() == 1) ? 2: replicas.size();

  for (ii = 0; ii < attempts; ++ii)
  {
    if ((replicas.size() == 1) && (ii == 1)) {
      if (rc != MEMCACHED_CONNECTION_FAILURE)
      {
        // This is a legitimate error, not a transient server failure, so we
        // shouldn't retry.
        break;
      }
      replica_idx = 0;
      LOG_WARNING("Failed to write to sole memcached replica: retrying once");
    }
    else
    {
      replica_idx = ii;
    }

    LOG_DEBUG("Attempt conditional write to replica %d (connection %p), CAS = %ld",
              replica_idx,
              replicas[replica_idx],
              cas);

    if (cas == 0)
    {
      // New record, so attempt to add.  This will fail if someone else
      // gets there first.
      rc = memcached_add(replicas[replica_idx],
                         key_ptr,
                         key_len,
                         data.data(),
                         data.length(),
                         memcached_expiration,
                         exptime);
    }
    else
    {
      // This is an update to an existing record, so use memcached_cas
      // to make sure it is atomic.
      rc = memcached_cas(replicas[replica_idx],
                         key_ptr,
                         key_len,
                         data.data(),
                         data.length(),
                         memcached_expiration,
                         exptime,
                         cas);

    }

    if (memcached_success(rc))
    {
      LOG_DEBUG("Conditional write succeeded to replica %d", replica_idx);
      break;
    }
    else
    {
      LOG_DEBUG("memcached_%s command for %s failed on replica %d, rc = %d (%s), expiry = %d\n%s",
                (cas == 0) ? "add" : "cas",
                fqkey.c_str(),
                replica_idx,
                rc,
                memcached_strerror(replicas[replica_idx], rc),
                expiry,
                memcached_last_error_message(replicas[replica_idx]));

      if ((rc == MEMCACHED_NOTSTORED) ||
          (rc == MEMCACHED_DATA_EXISTS))
      {
        if (trail != 0)
        {
          SAS::Event err(trail, SASEvent::MEMCACHED_SET_CONTENTION, 0);
          err.add_var_param(fqkey);
          SAS::report_event(err);
        }

        // A NOT_STORED or EXISTS response indicates a concurrent write failure,
        // so return this to the application immediately - don't go on to
        // other replicas.
        LOG_INFO("Contention writing data for %s to store", fqkey.c_str());
        status = Store::Status::DATA_CONTENTION;
        break;
      }
    }
  }

  if ((rc == MEMCACHED_SUCCESS) &&
      (replica_idx < replicas.size()))
  {
    // Write has succeeded, so write unconditionally (and asynchronously)
    // to the replicas.
    for (size_t jj = replica_idx + 1; jj < replicas.size(); ++jj)
    {
      LOG_DEBUG("Attempt unconditional write to replica %d", jj);
      memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 1);
      memcached_set(replicas[jj],
                    key_ptr,
                    key_len,
                    data.data(),
                    data.length(),
                    memcached_expiration,
                    exptime);
      memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 0);
    }
  }

  if ((!memcached_success(rc)) &&
      (rc != MEMCACHED_NOTSTORED) &&
      (rc != MEMCACHED_DATA_EXISTS))
  {
    if (trail != 0)
    {
      SAS::Event err(trail, SASEvent::MEMCACHED_SET_FAILED, 0);
      err.add_var_param(fqkey);
      SAS::report_event(err);
    }

    LOG_ERROR("Failed to write data for %s to %d replicas",
              fqkey.c_str(), replicas.size());
    status = Store::Status::ERROR;
  }

  return status;
}
Exemplo n.º 9
0
/// Retrieve the data for a given namespace and key.
Store::Status MemcachedStore::get_data(const std::string& table,
                                       const std::string& key,
                                       std::string& data,
                                       uint64_t& cas,
                                       SAS::TrailId trail)
{
  Store::Status status = Store::Status::OK;


  // Construct the fully qualified key.
  std::string fqkey = table + "\\\\" + key;
  const char* key_ptr = fqkey.data();
  const size_t key_len = fqkey.length();

  const std::vector<memcached_st*>& replicas = get_replicas(fqkey, Op::READ);

  if (trail != 0)
  {
    SAS::Event start(trail, SASEvent::MEMCACHED_GET_START, 0);
    start.add_var_param(fqkey);
    SAS::report_event(start);
  }

  LOG_DEBUG("%d read replicas for key %s", replicas.size(), fqkey.c_str());

  // Read from all replicas until we get a positive result.
  memcached_return_t rc = MEMCACHED_ERROR;
  bool active_not_found = false;
  size_t failed_replicas = 0;
  size_t ii;
  // If we only have one replica, we should try it twice -
  // libmemcached won't notice a dropped TCP connection until it tries
  // to make a request on it, and will fail the request then
  // reconnect, so the second attempt could still work.
  size_t attempts = (replicas.size() == 1) ? 2 : replicas.size();

  for (ii = 0; ii < attempts; ++ii)
  {
    size_t replica_idx;
    if ((replicas.size() == 1) && (ii == 1))
    {
      if (rc != MEMCACHED_CONNECTION_FAILURE)
      {
        // This is a legitimate error, not a server failure, so we
        // shouldn't retry.
        break;
      }
      replica_idx = 0;
      LOG_WARNING("Failed to read from sole memcached replica: retrying once");
    }
    else
    {
      replica_idx = ii;
    }
    // We must use memcached_mget because memcached_get does not retrieve CAS
    // values.
    LOG_DEBUG("Attempt to read from replica %d (connection %p)", replica_idx, replicas[replica_idx]);
    rc = memcached_mget(replicas[replica_idx], &key_ptr, &key_len, 1);

    if (memcached_success(rc))
    {
      // memcached_mget command was successful, so retrieve the result.
      LOG_DEBUG("Fetch result");
      memcached_result_st result;
      memcached_result_create(replicas[replica_idx], &result);
      memcached_fetch_result(replicas[replica_idx], &result, &rc);

      if (memcached_success(rc))
      {
        // Found a record, so exit the read loop.
        LOG_DEBUG("Found record on replica %d", replica_idx);
        data.assign(memcached_result_value(&result), memcached_result_length(&result));
        cas = (active_not_found) ? 0 : memcached_result_cas(&result);

        // std::string::assign copies its arguments when used with a
        // char*, so this is safe.
        memcached_result_free(&result);
        break;
      }
      else
      {
        // Free the result and continue the read loop.
        memcached_result_free(&result);
      }
    }


    if (rc == MEMCACHED_NOTFOUND)
    {
      // Failed to find a record on an active replica.  Flag this so if we do
      // find data on a later replica we can reset the cas value returned to
      // zero to ensure a subsequent write will succeed.
      LOG_DEBUG("Read for %s on replica %d returned NOTFOUND", fqkey.c_str(), replica_idx);
      active_not_found = true;
    }
    else
    {
      // Error from this node, so consider it inactive.
      LOG_DEBUG("Read for %s on replica %d returned error %d (%s)",
                fqkey.c_str(), replica_idx, rc, memcached_strerror(replicas[replica_idx], rc));
      ++failed_replicas;
    }
  }

  if (memcached_success(rc))
  {
    if (trail != 0)
    {
      SAS::Event got_data(trail, SASEvent::MEMCACHED_GET_SUCCESS, 0);
      got_data.add_var_param(fqkey);
      got_data.add_var_param(data);
      got_data.add_static_param(cas);
      SAS::report_event(got_data);
    }
    // Return the data and CAS value.  The CAS value is either set to the CAS
    // value from the result, or zero if an earlier active replica returned
    // NOT_FOUND.  This ensures that a subsequent set operation will succeed
    // on the earlier active replica.
    LOG_DEBUG("Read %d bytes from table %s key %s, CAS = %ld",
              data.length(), table.c_str(), key.c_str(), cas);

  }
  else if (failed_replicas < replicas.size())
  {
    // At least one replica returned NOT_FOUND.
    if (trail != 0)
    {
      SAS::Event not_found(trail, SASEvent::MEMCACHED_GET_NOT_FOUND, 0);
      not_found.add_var_param(fqkey);
      SAS::report_event(not_found);
    }

    LOG_DEBUG("At least one replica returned not found, so return NOT_FOUND");
    status = Store::Status::NOT_FOUND;
  }
  else
  {
    // All replicas returned an error, so log the error and return the
    // failure.
    if (trail != 0)
    {
      SAS::Event err(trail, SASEvent::MEMCACHED_GET_ERROR, 0);
      err.add_var_param(fqkey);
      SAS::report_event(err);
    }

    LOG_ERROR("Failed to read data for %s from %d replicas",
              fqkey.c_str(), replicas.size());
    status = Store::Status::ERROR;
  }

  return status;
}
Exemplo n.º 10
0
memcached_return_t BaseMemcachedStore::add_overwriting_tombstone(memcached_st* replica,
                                                                 const char* key_ptr,
                                                                 const size_t key_len,
                                                                 const uint32_t vbucket,
                                                                 const std::string& data,
                                                                 time_t memcached_expiration,
                                                                 uint32_t flags,
                                                                 SAS::TrailId trail)
{
  memcached_return_t rc;
  uint64_t cas = 0;

  TRC_DEBUG("Attempting to add data for key %.*s", key_len, key_ptr);

  // Convert the key into a std::string (sas-client does not like that
  // key_{ptr,len} are constant).
  const std::string key(key_ptr, key_len);

  while (true)
  {
    if (cas == 0)
    {
      TRC_DEBUG("Attempting memcached ADD command");
      rc = memcached_add_vb(replica,
                            key_ptr,
                            key_len,
                            _binary ? vbucket : 0,
                            data.data(),
                            data.length(),
                            memcached_expiration,
                            flags);
    }
    else
    {
      TRC_DEBUG("Attempting memcached CAS command (cas = %d)", cas);
      rc = memcached_cas_vb(replica,
                            key_ptr,
                            key_len,
                            _binary ? vbucket : 0,
                            data.data(),
                            data.length(),
                            memcached_expiration,
                            flags,
                            cas);
    }

    if ((rc == MEMCACHED_DATA_EXISTS) ||
        (rc == MEMCACHED_NOTSTORED))
    {
      // A record with this key already exists. If it is a tombstone, we need
      // to overwrite it. Get the record to see what it is.
      memcached_return_t get_rc;
      std::string existing_data;

      TRC_DEBUG("Existing data prevented the ADD/CAS."
                "Issue GET to see if we need to overwrite a tombstone");
      get_rc = get_from_replica(replica, key_ptr, key_len, existing_data, cas);

      if (memcached_success(get_rc))
      {
        if (existing_data != TOMBSTONE)
        {
          // The existing record is not a tombstone.  We mustn't overwrite
          // this, so break out of the loop and return the original return code
          // from the ADD/CAS.
          TRC_DEBUG("Found real data. Give up");
          break;
        }
        else
        {
          // The existing record IS a tombstone. Go round the loop again to
          // overwrite it. `cas` has been set to the cas of the tombstone.
          TRC_DEBUG("Found a tombstone. Attempt to overwrite");

          if (trail != 0)
          {
            SAS::Event event(trail, SASEvent::MEMCACHED_SET_BLOCKED_BY_TOMBSTONE, 0);
            event.add_var_param(key);
            event.add_static_param(cas);
            SAS::report_event(event);
          }
        }
      }
      else if (get_rc == MEMCACHED_NOTFOUND)
      {
        // The GET returned that there is no record for this key. This can
        // happen if the record has expired. We need to try again (it could
        // have been a tombstone which should not block adds).
        TRC_DEBUG("GET failed with NOT_FOUND");

        if (trail != 0)
        {
          SAS::Event event(trail, SASEvent::MEMCACHED_SET_BLOCKED_BY_EXPIRED, 0);
          event.add_var_param(key);
          SAS::report_event(event);
        }
      }
      else
      {
        // The replica failed. Return the return code from the original ADD/CAS.
        TRC_DEBUG("GET failed, rc = %d (%s)\n%s",
                  get_rc,
                  memcached_strerror(replica, get_rc),
                  memcached_last_error_message(replica));
        break;
      }
    }
    else
    {
      TRC_DEBUG("ADD/CAS returned rc = %d (%s)\n%s",
                rc,
                memcached_strerror(replica, rc),
                memcached_last_error_message(replica));
      break;
    }
  }

  return rc;
}
Exemplo n.º 11
0
/// Update the data for the specified namespace and key.  Writes the data
/// atomically, so if the underlying data has changed since it was last
/// read, the update is rejected and this returns Store::Status::CONTENTION.
Store::Status BaseMemcachedStore::set_data(const std::string& table,
                                           const std::string& key,
                                           const std::string& data,
                                           uint64_t cas,
                                           int expiry,
                                           SAS::TrailId trail)
{
  Store::Status status = Store::Status::OK;

  TRC_DEBUG("Writing %d bytes to table %s key %s, CAS = %ld, expiry = %d",
            data.length(), table.c_str(), key.c_str(), cas, expiry);

  // Construct the fully qualified key.
  std::string fqkey = table + "\\\\" + key;
  const char* key_ptr = fqkey.data();
  const size_t key_len = fqkey.length();

  int vbucket = vbucket_for_key(fqkey);
  const std::vector<memcached_st*>& replicas = get_replicas(vbucket, Op::WRITE);

  if (trail != 0)
  {
    SAS::Event start(trail, SASEvent::MEMCACHED_SET_START, 0);
    start.add_var_param(fqkey);
    start.add_var_param(data);
    start.add_static_param(cas);
    start.add_static_param(expiry);
    SAS::report_event(start);
  }

  TRC_DEBUG("%d write replicas for key %s", replicas.size(), fqkey.c_str());

  // Calculate a timestamp (least-significant 32 bits of milliseconds since the
  // epoch) for the current time.  We store this in the flags field to allow us
  // to resolve conflicts when resynchronizing between memcached servers.
  struct timespec ts;
  (void)clock_gettime(CLOCK_REALTIME, &ts);
  uint32_t flags = (uint32_t)((ts.tv_sec * 1000) + (ts.tv_nsec / 1000000));

  // Memcached uses a flexible mechanism for specifying expiration.
  // - 0 indicates never expire.
  // - <= MEMCACHED_EXPIRATION_MAXDELTA indicates a relative (delta) time.
  // - > MEMCACHED_EXPIRATION_MAXDELTA indicates an absolute time.
  // Absolute time is the only way to force immediate expiry.  Unfortunately,
  // it's not reliable - see https://github.com/Metaswitch/cpp-common/issues/160
  // for details.  Instead, we use relative time for future times (expiry > 0)
  // and the earliest absolute time for immediate expiry (expiry == 0).
  time_t memcached_expiration =
    (time_t)((expiry > 0) ? expiry : MEMCACHED_EXPIRATION_MAXDELTA + 1);

  // First try to write the primary data record to the first responding
  // server.
  memcached_return_t rc = MEMCACHED_ERROR;
  size_t ii;
  size_t replica_idx;

  // If we only have one replica, we should try it twice -
  // libmemcached won't notice a dropped TCP connection until it tries
  // to make a request on it, and will fail the request then
  // reconnect, so the second attempt could still work.
  size_t attempts = (replicas.size() == 1) ? 2: replicas.size();

  for (ii = 0; ii < attempts; ++ii)
  {
    if ((replicas.size() == 1) && (ii == 1)) {
      if (rc != MEMCACHED_CONNECTION_FAILURE)
      {
        // This is a legitimate error, not a transient server failure, so we
        // shouldn't retry.
        break;
      }
      replica_idx = 0;
      TRC_WARNING("Failed to write to sole memcached replica: retrying once");
    }
    else
    {
      replica_idx = ii;
    }

    TRC_DEBUG("Attempt conditional write to vbucket %d on replica %d (connection %p), CAS = %ld, expiry = %d",
              vbucket,
              replica_idx,
              replicas[replica_idx],
              cas,
              expiry);

    if (cas == 0)
    {
      // New record, so attempt to add (but overwrite any tombstones we
      // encounter).  This will fail if someone else got there first and some
      // data already exists in memcached for this key.
      rc = add_overwriting_tombstone(replicas[replica_idx],
                                     key_ptr,
                                     key_len,
                                     vbucket,
                                     data,
                                     memcached_expiration,
                                     flags,
                                     trail);
    }
    else
    {
      // This is an update to an existing record, so use memcached_cas
      // to make sure it is atomic.
      rc = memcached_cas_vb(replicas[replica_idx],
                            key_ptr,
                            key_len,
                            _binary ? vbucket : 0,
                            data.data(),
                            data.length(),
                            memcached_expiration,
                            flags,
                            cas);

      if (!memcached_success(rc))
      {
        TRC_DEBUG("memcached_cas command failed, rc = %d (%s)\n%s",
                  rc,
                  memcached_strerror(replicas[replica_idx], rc),
                  memcached_last_error_message(replicas[replica_idx]));
      }
    }

    if (memcached_success(rc))
    {
      TRC_DEBUG("Conditional write succeeded to replica %d", replica_idx);
      break;
    }
    else if ((rc == MEMCACHED_NOTSTORED) ||
             (rc == MEMCACHED_DATA_EXISTS))
    {
      if (trail != 0)
      {
        SAS::Event err(trail, SASEvent::MEMCACHED_SET_CONTENTION, 0);
        err.add_var_param(fqkey);
        SAS::report_event(err);
      }

      // A NOT_STORED or EXISTS response indicates a concurrent write failure,
      // so return this to the application immediately - don't go on to
      // other replicas.
      TRC_INFO("Contention writing data for %s to store", fqkey.c_str());
      status = Store::Status::DATA_CONTENTION;
      break;
    }
  }

  if ((rc == MEMCACHED_SUCCESS) &&
      (replica_idx < replicas.size()))
  {
    // Write has succeeded, so write unconditionally (and asynchronously)
    // to the replicas.
    for (size_t jj = replica_idx + 1; jj < replicas.size(); ++jj)
    {
      TRC_DEBUG("Attempt unconditional write to replica %d", jj);
      memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 1);
      memcached_set_vb(replicas[jj],
                      key_ptr,
                      key_len,
                      _binary ? vbucket : 0,
                      data.data(),
                      data.length(),
                      memcached_expiration,
                      flags);
      memcached_behavior_set(replicas[jj], MEMCACHED_BEHAVIOR_NOREPLY, 0);
    }
  }

  if ((!memcached_success(rc)) &&
      (rc != MEMCACHED_NOTSTORED) &&
      (rc != MEMCACHED_DATA_EXISTS))
  {
    if (trail != 0)
    {
      SAS::Event err(trail, SASEvent::MEMCACHED_SET_FAILED, 0);
      err.add_var_param(fqkey);
      SAS::report_event(err);
    }

    update_vbucket_comm_state(vbucket, FAILED);

    if (_comm_monitor)
    {
      _comm_monitor->inform_failure();
    }

    TRC_ERROR("Failed to write data for %s to %d replicas",
              fqkey.c_str(), replicas.size());
    status = Store::Status::ERROR;
  }
  else
  {
    update_vbucket_comm_state(vbucket, OK);

    if (_comm_monitor)
    {
      _comm_monitor->inform_success();
    }
  }

  return status;
}
Exemplo n.º 12
0
/// Retrieve the data for a given namespace and key.
Store::Status BaseMemcachedStore::get_data(const std::string& table,
                                           const std::string& key,
                                           std::string& data,
                                           uint64_t& cas,
                                           SAS::TrailId trail)
{
  Store::Status status;

  // Construct the fully qualified key.
  std::string fqkey = table + "\\\\" + key;
  const char* key_ptr = fqkey.data();
  const size_t key_len = fqkey.length();

  int vbucket = vbucket_for_key(fqkey);
  const std::vector<memcached_st*>& replicas = get_replicas(vbucket, Op::READ);

  if (trail != 0)
  {
    SAS::Event start(trail, SASEvent::MEMCACHED_GET_START, 0);
    start.add_var_param(fqkey);
    SAS::report_event(start);
  }

  TRC_DEBUG("%d read replicas for key %s", replicas.size(), fqkey.c_str());

  // Read from all replicas until we get a positive result.
  memcached_return_t rc = MEMCACHED_ERROR;
  bool active_not_found = false;
  size_t failed_replicas = 0;
  size_t ii;

  // If we only have one replica, we should try it twice -
  // libmemcached won't notice a dropped TCP connection until it tries
  // to make a request on it, and will fail the request then
  // reconnect, so the second attempt could still work.
  size_t attempts = (replicas.size() == 1) ? 2 : replicas.size();

  for (ii = 0; ii < attempts; ++ii)
  {
    size_t replica_idx;
    if ((replicas.size() == 1) && (ii == 1))
    {
      if (rc != MEMCACHED_CONNECTION_FAILURE)
      {
        // This is a legitimate error, not a server failure, so we
        // shouldn't retry.
        break;
      }
      replica_idx = 0;
      TRC_WARNING("Failed to read from sole memcached replica: retrying once");
    }
    else
    {
      replica_idx = ii;
    }

    TRC_DEBUG("Attempt to read from replica %d (connection %p)",
              replica_idx,
              replicas[replica_idx]);
    rc = get_from_replica(replicas[replica_idx], key_ptr, key_len, data, cas);

    if (memcached_success(rc))
    {
      // Got data back from this replica. Don't try any more.
      TRC_DEBUG("Read for %s on replica %d returned SUCCESS",
                fqkey.c_str(),
                replica_idx);
      break;
    }
    else if (rc == MEMCACHED_NOTFOUND)
    {
      // Failed to find a record on an active replica.  Flag this so if we do
      // find data on a later replica we can reset the cas value returned to
      // zero to ensure a subsequent write will succeed.
      TRC_DEBUG("Read for %s on replica %d returned NOTFOUND", fqkey.c_str(), replica_idx);
      active_not_found = true;
    }
    else
    {
      // Error from this node, so consider it inactive.
      TRC_DEBUG("Read for %s on replica %d returned error %d (%s)",
                fqkey.c_str(), replica_idx, rc, memcached_strerror(replicas[replica_idx], rc));
      ++failed_replicas;
    }
  }

  if (memcached_success(rc))
  {
    if (data != TOMBSTONE)
    {
      if (trail != 0)
      {
        SAS::Event got_data(trail, SASEvent::MEMCACHED_GET_SUCCESS, 0);
        got_data.add_var_param(fqkey);
        got_data.add_var_param(data);
        got_data.add_static_param(cas);
        SAS::report_event(got_data);
      }

      // Return the data and CAS value.  The CAS value is either set to the CAS
      // value from the result, or zero if an earlier active replica returned
      // NOT_FOUND.  This ensures that a subsequent set operation will succeed
      // on the earlier active replica.
      if (active_not_found)
      {
        cas = 0;
      }

      TRC_DEBUG("Read %d bytes from table %s key %s, CAS = %ld",
                data.length(), table.c_str(), key.c_str(), cas);
      status = Store::OK;
    }
    else
    {
      if (trail != 0)
      {
        SAS::Event got_tombstone(trail, SASEvent::MEMCACHED_GET_TOMBSTONE, 0);
        got_tombstone.add_var_param(fqkey);
        got_tombstone.add_static_param(cas);
        SAS::report_event(got_tombstone);
      }

      // We have read a tombstone. Return NOT_FOUND to the caller, and also
      // zero out the CAS (returning a zero CAS makes the interface cleaner).
      TRC_DEBUG("Read tombstone from table %s key %s, CAS = %ld",
                table.c_str(), key.c_str(), cas);
      cas = 0;
      status = Store::NOT_FOUND;
    }

    // Regardless of whether we got a tombstone, the vbucket is alive.
    update_vbucket_comm_state(vbucket, OK);

    if (_comm_monitor)
    {
      _comm_monitor->inform_success();
    }
  }
  else if (failed_replicas < replicas.size())
  {
    // At least one replica returned NOT_FOUND.
    if (trail != 0)
    {
      SAS::Event not_found(trail, SASEvent::MEMCACHED_GET_NOT_FOUND, 0);
      not_found.add_var_param(fqkey);
      SAS::report_event(not_found);
    }

    TRC_DEBUG("At least one replica returned not found, so return NOT_FOUND");
    status = Store::Status::NOT_FOUND;

    update_vbucket_comm_state(vbucket, OK);

    if (_comm_monitor)
    {
      _comm_monitor->inform_success();
    }
  }
  else
  {
    // All replicas returned an error, so log the error and return the
    // failure.
    if (trail != 0)
    {
      SAS::Event err(trail, SASEvent::MEMCACHED_GET_ERROR, 0);
      err.add_var_param(fqkey);
      SAS::report_event(err);
    }

    TRC_ERROR("Failed to read data for %s from %d replicas",
              fqkey.c_str(), replicas.size());
    status = Store::Status::ERROR;

    update_vbucket_comm_state(vbucket, FAILED);

    if (_comm_monitor)
    {
      _comm_monitor->inform_failure();
    }
  }

  return status;
}