Example #1
0
/**
 * paxos_sync - GEvent-friendly wrapper around proposer_sync.
 */
int paxos_sync(void *data)
{
  pax_uuid_t *uuid;

  // Set the session.  We parametrize paxos_sync with a pointer to a session
  // ID when we add it to the main event loop.
  uuid = (pax_uuid_t *)data;
  pax = session_find(&state.sessions, uuid);

  if (is_proposer()) {
    proposer_sync();
  }

  return TRUE;
}
Example #2
0
/**
 * continue_ack_redirect - If we were able to reestablish connection with the
 * purported proposer, relinquish our proposership, clear our defer list,
 * and reintroduce ourselves.  Otherwise, try preparing again.
 */
int
do_continue_ack_redirect(GIOChannel *chan, struct paxos_acceptor *acc,
    struct paxos_continuation *k)
{
  // Sanity check the choice of acc.
  assert(acc->pa_paxid < pax->self_id);

  // If connection to the acceptor has already been reestablished, we should
  // no longer be the proposer and we can simply return.
  if (acc->pa_peer != NULL) {
    assert(!is_proposer());
    return 0;
  }

  // Free the old prepare regardless of whether reconnection succeeded.
  g_free(pax->prep);
  pax->prep = NULL;

  // Register the reconnection; on failure, reprepare.
  acc->pa_peer = paxos_peer_init(chan);
  if (acc->pa_peer != NULL) {
    // Account for a new acceptor.
    pax->live_count++;

    // We update the proposer only if we have not reconnected to an even
    // higher-ranked acceptor.
    if (acc->pa_paxid < pax->proposer->pa_paxid) {
      pax->proposer = acc;
    }

    // Destroy the defer list; we're finished trying to prepare.
    // XXX: Do we want to somehow pass it to the real proposer?  How do we
    // know which requests were made for us?
    instance_container_destroy(&pax->idefer);

    // Say hello.
    return paxos_hello(acc);
  } else {
    // Prepare again, continuing to append to the defer list.
    return proposer_prepare(NULL);
  }
}
Example #3
0
/**
 * continue_ack_refuse - If we were able to reestablish connection with the
 * purported proposer, reset our proposer and reintroduce ourselves.
 */
int
do_continue_ack_refuse(GIOChannel *chan, struct paxos_acceptor *acc,
    struct paxos_continuation *k)
{
  int r = 0;
  struct paxos_header hdr;
  struct paxos_request *req;
  struct yakyak yy;

  // If we are the proposer and have finished preparing, anyone higher-ranked
  // than we are is dead to us.  However, their parts may not yet have gone
  // through, so we make sure to ignore attempts at reconnection.
  if (is_proposer() && pax->prep == NULL) {
    return 0;
  }

  // Register the reconnection.
  acc->pa_peer = paxos_peer_init(chan);
  if (acc->pa_peer != NULL) {
    // Account for a new acceptor.
    pax->live_count++;

    // Free any prep we have.  Although we dispatch as an acceptor when we
    // acknowledge a refuse, when the acknowledgement continues here, we may
    // have become the proposer.  Thus, if we are preparing, we should just
    // give up.  If the acceptor we are reconnecting to fails, we'll find
    // out about the drop and then reprepare.
    g_free(pax->prep);
    pax->prep = NULL;
    instance_container_destroy(&pax->idefer);

    // Say hello.
    ERR_ACCUM(r, paxos_hello(acc));

    if (acc->pa_paxid < pax->proposer->pa_paxid) {
      // Update the proposer only if we have not reconnected to an even
      // higher-ranked acceptor.
      pax->proposer = acc;

      // Resend our request.
      // XXX: What about the problematic case where A is connected to B, B
      // thinks it's the proposer and accepts A's request, but in fact B is not
      // the proposer and C, the real proposer, gets neither of their requests?
      header_init(&hdr, OP_REQUEST, pax->proposer->pa_paxid);

      req = request_find(&pax->rcache, k->pk_data.req.pr_val.pv_reqid);
      if (req == NULL) {
        req = &k->pk_data.req;
      }

      yakyak_init(&yy, 2);
      paxos_header_pack(&yy, &hdr);
      paxos_request_pack(&yy, req);

      ERR_ACCUM(r, paxos_send_to_proposer(&yy));
      yakyak_destroy(&yy);
    }
  }

  return r;
}
Example #4
0
/**
 * paxos_commit - Commit a value for an instance of the Paxos protocol.
 *
 * We totally order calls to paxos_learn by instance number in order to make
 * the join and greet protocols behave properly.  This also gives our chat
 * clients an easy mechanism for totally ordering their logs without extra
 * work on their part.
 *
 * It is possible that failed DEC_PART decrees (i.e., decrees in which the
 * proposer attempts to disconnect an acceptor who a majority of acceptors
 * believe is still alive) could delay the learning of committed chat
 * messages.  To avoid this, once a proposer receives enough rejections
 * of the decree, the part decree is replaced with a null decree.  The
 * proposer can then issue the part again with a higher instance number
 * if desired.
 */
int
paxos_commit(struct paxos_instance *inst)
{
  int r;
  struct paxos_request *req = NULL;
  struct paxos_instance *it;

  // Mark the commit.
  inst->pi_committed = true;

  // Pull the request from the request cache if applicable.
  if (request_needs_cached(inst->pi_val.pv_dkind)) {
    req = request_find(&pax->rcache, inst->pi_val.pv_reqid);

    // If we can't find a request and need one, send out a retrieve to the
    // request originator and defer the commit.
    if (req == NULL) {
      return paxos_retrieve(inst);
    }
  }

  // Mark the cache.
  inst->pi_cached = true;

  // We should already have committed and learned everything before the hole.
  assert(inst->pi_hdr.ph_inum >= pax->ihole);

  // Since we want our learns to be totally ordered, if we didn't just fill
  // the hole, we cannot learn.
  if (inst->pi_hdr.ph_inum != pax->ihole) {
    // If we're the proposer, we have to just wait it out.
    if (is_proposer()) {
      return 0;
    }

    // If the hole has committed but is just waiting on a retrieve, we'll learn
    // when we receive the resend.
    if (pax->istart->pi_hdr.ph_inum == pax->ihole && pax->istart->pi_committed) {
      assert(!pax->istart->pi_cached);
      return 0;
    }

    // The hole is either missing or uncommitted and we are not the proposer,
    // so issue a retry.
    return acceptor_retry(pax->ihole);
  }

  // Set pax->istart to point to the instance numbered pax->ihole.
  if (pax->istart->pi_hdr.ph_inum != pax->ihole) {
    pax->istart = LIST_NEXT(pax->istart, pi_le);
  }
  assert(pax->istart->pi_hdr.ph_inum == pax->ihole);

  // Now learn as many contiguous commits as we can.  This function is the
  // only path by which we learn commits, and we always learn in contiguous
  // blocks.  Therefore, it is an invariant of our system that all the
  // instances numbered lower than pax->ihole are learned and committed, and
  // none of the instances geq to pax->ihole are learned (although some may
  // be committed).
  //
  // We iterate over the instance list, detecting and breaking if we find a
  // hole and learning whenever we don't.
  for (it = pax->istart; ; it = LIST_NEXT(it, pi_le), ++pax->ihole) {
    // If we reached the end of the list, set pax->istart to the last existing
    // instance.
    if (it == (void *)&pax->ilist) {
      pax->istart = LIST_LAST(&pax->ilist);
      break;
    }

    // If we skipped over an instance number because we were missing an
    // instance, set pax->istart to the last instance before the hole.
    if (it->pi_hdr.ph_inum != pax->ihole) {
      pax->istart = LIST_PREV(it, pi_le);
      break;
    }

    // If we found an uncommitted or uncached instance, set pax->istart to it.
    if (!it->pi_committed || !it->pi_cached) {
      pax->istart = it;
      break;
    }

    // By our invariant, since we are past our original hole, no instance
    // should be learned.
    assert(!it->pi_learned);

    // Grab its associated request.  This is guaranteed to exist because we
    // have checked that pi_cached holds.
    req = NULL;
    if (request_needs_cached(it->pi_val.pv_dkind)) {
      req = request_find(&pax->rcache, it->pi_val.pv_reqid);
      assert(req != NULL);
    }

    // Learn the value.
    ERR_RET(r, paxos_learn(it, req));
  }

  return 0;
}
Example #5
0
/**
 * paxos_learn - Do something useful with the value of a commit.
 *
 * Note that we cannot free up the instance or any request associated with
 * it until a sync.
 */
int
paxos_learn(struct paxos_instance *inst, struct paxos_request *req)
{
  int r = 0;
  struct paxos_acceptor *acc;

  // Mark the learn.
  inst->pi_learned = true;

  // Act on the decree (e.g., display chat, record acceptor list changes).
  switch (inst->pi_val.pv_dkind) {
    case DEC_NULL:
      break;

    case DEC_CHAT:
      // Grab the message sender.
      acc = acceptor_find(&pax->alist, req->pr_val.pv_reqid.id);
      assert(acc != NULL);

      // Invoke client learning callback.
      state.learn.chat(req->pr_data, req->pr_size, acc->pa_desc, acc->pa_size,
          pax->client_data);
      break;

    case DEC_JOIN:
      // Check the adefer list to see if we received a hello already for the
      // newly joined acceptor.
      acc = acceptor_find(&pax->adefer, inst->pi_hdr.ph_inum);

      if (acc != NULL) {
        // We found a deferred hello.  To complete the hello, just move our
        // acceptor over to the alist and increment the live count.
        LIST_REMOVE(&pax->adefer, acc, pa_le);
        pax->live_count++;
      } else {
        // We have not yet gotten the hello, so create a new acceptor.
        acc = g_malloc0(sizeof(*acc));
        acc->pa_paxid = inst->pi_hdr.ph_inum;
      }
      acceptor_insert(&pax->alist, acc);

      // Copy over the identity information.
      acc->pa_size = req->pr_size;
      acc->pa_desc = g_memdup(req->pr_data, req->pr_size);

      // If we are the proposer, we are responsible for connecting to the new
      // acceptor, as well as for sending the new acceptor its paxid and other
      // initial data.
      if (is_proposer()) {
        proposer_welcome(acc);
      }

      // Invoke client learning callback.
      state.learn.join(req->pr_data, req->pr_size, acc->pa_desc, acc->pa_size,
          pax->client_data);
      break;

    case DEC_PART:
    case DEC_KILL:
      // Grab the acceptor from the alist.
      acc = acceptor_find(&pax->alist, inst->pi_val.pv_extra);
      if (acc == NULL) {
        // It is possible that we may part twice; for instance, if a proposer
        // issues a part for itself but its departure from the system is
        // detected by acceptors before the part commit is received.  In this
        // case, just do nothing.
        break;
      }

      // Invoke client learning callback.
      state.learn.part(acc->pa_desc, acc->pa_size, acc->pa_desc, acc->pa_size,
          pax->client_data);

      // If we are being parted, leave the protocol.
      if (acc->pa_paxid == pax->self_id) {
        return paxos_end(pax);
      }

      // Take the parted acceptor off the list and do accounting if it was
      // still live.
      LIST_REMOVE(&pax->alist, acc, pa_le);
      if (acc->pa_peer != NULL) {
        pax->live_count--;
      }

      // If we just parted our proposer, "elect" a new one.  If it's us, send
      // a prepare.
      if (acc->pa_paxid == pax->proposer->pa_paxid) {
        reset_proposer();
        if (is_proposer()) {
          r = proposer_prepare(acc);
        }
      }

      // Free the parted acceptor.
      acceptor_destroy(acc);

      break;
  }

  return r;
}
Example #6
0
/**
 * paxos_request - Request that the proposer make a decree for us.
 *
 * If the request has data attached to it, we broadcast an out-of-band message
 * to all acceptors, asking that they cache our message until the proposer
 * commits it.
 *
 * We send the request as a header along with a two-object array consisting
 * of a paxos_value (itself an array) and a msgpack raw (i.e., a data
 * string).
 */
int
paxos_request(struct paxos_session *session, dkind_t dkind, const void *msg,
    size_t len)
{
  int r, needs_cached;
  struct paxos_header hdr;
  struct paxos_request *req;
  struct paxos_yak py;

  // Set the session.  The client should pass us a pointer to the correct
  // session object which we returned when the session was created.
  pax = session;

  // We can't make requests if we're not part of a protocol.
  if (pax == NULL) {
    return 1;
  }

  // Do we need to cache this request?
  needs_cached = request_needs_cached(dkind);

  // Initialize a header.  We overload ph_inum to the ID of the acceptor who
  // we believe to be the proposer.
  header_init(&hdr, OP_REQUEST, pax->proposer->pa_paxid);

  // Allocate a request and initialize it.
  req = g_malloc0(sizeof(*req));
  req->pr_val.pv_dkind = dkind;
  req->pr_val.pv_reqid.id = pax->self_id;
  req->pr_val.pv_reqid.gen = (++pax->req_id);  // Increment our req_id.
  req->pr_val.pv_extra = 0; // Always 0 for requests.

  req->pr_size = len;
  req->pr_data = g_memdup(msg, len);

  // Add it to the request cache if needed.
  if (needs_cached) {
    request_insert(&pax->rcache, req);
  }

  if (!is_proposer() || needs_cached) {
    // We need to send iff either we are not the proposer or the request
    // has nontrivial data.
    paxos_payload_init(&py, 2);
    paxos_header_pack(&py, &hdr);
    paxos_request_pack(&py, req);

    // Broadcast only if it needs caching.
    if (!needs_cached) {
      r = paxos_send_to_proposer(&py);
    } else {
      r = paxos_broadcast(&py);
    }

    paxos_payload_destroy(&py);
    if (r) {
      return r;
    }
  }

  // Decree the request if we're the proposer; otherwise just return.
  if (is_proposer()) {
    return proposer_decree_request(req);
  } else {
    return 0;
  }
}