/** * acceptor_ack_refuse - Resolve an acceptor's claim that we do not know * the true proposer. * * If we send a request to someone who is not the proposer, but identifying * them as the proposer, we will receive a refuse. Since the correctness * of the Paxos protocol guarantees that the acceptor list has a consistent * total ordering across the system, receiving a refuse means that there is * someone more fitting to be proposer than the acceptor we identified. * * Note, as with ack_redirect, that it is possible we noticed a proposer * failure and sent our request to the new proposer correctly before the new * proposer themselves recognized the failure. */ int acceptor_ack_refuse(struct paxos_header *hdr, msgpack_object *o) { int r; msgpack_object *p; struct paxos_acceptor *acc; struct paxos_continuation *k; // Check whether, since we sent our request, we have already found a more // suitable proposer, possibly due to another redirect, in which case we // can ignore this one. if (pax->proposer->pa_paxid <= hdr->ph_inum) { return 0; } // Pull out the acceptor struct corresponding to the purported proposer and // try to reconnect. Note that we should have already set the pa_peer of // this acceptor to NULL to indicate the lost connection. acc = acceptor_find(&pax->alist, hdr->ph_inum); assert(acc->pa_peer == NULL); // Defer computation until the client performs connection. If it succeeds, // resend the request. We bind the request ID as callback data. k = continuation_new(continue_ack_refuse, acc->pa_paxid); assert(o->type == MSGPACK_OBJECT_ARRAY); p = o->via.array.ptr + 1; paxos_value_unpack(&k->pk_data.req.pr_val, p++); ERR_RET(r, state.connect(acc->pa_desc, acc->pa_size, &k->pk_cb)); return 0; }
machine_t *prelude__callcc(machine_t *machine) { object_t quote; machine_t *copy; quote = prelude_pop(machine); copy = machine_copy(machine); quote = list_new(continuation_new(copy), quote); prelude_push(machine, quote); return prelude__call(machine); }
/** * proposer_ack_reject - Acknowledge an acceptor's reject. * * Increment the reject count of the appropriate Paxos instance. If we have * a majority of rejects, try to reconnect to the acceptor we attempted to * force part. If we are successful, re-decree null; otherwise, try the part * again. */ int proposer_ack_reject(struct paxos_header *hdr) { int r; struct paxos_instance *inst; struct paxos_acceptor *acc; struct paxos_continuation *k; // Our prepare succeeded, so we have only one possible ballot in our // lifetime in the system. assert(ballot_compare(hdr->ph_ballot, pax->ballot) == 0); // Find the decree of the correct instance and increment the reject count. inst = instance_find(&pax->ilist, hdr->ph_inum); inst->pi_rejects++; // Ignore the vote if we've already committed. if (inst->pi_committed) { return 0; } // We only reject parts. However, we may continue to receive rejects even // after a majority rejects, in which case we may have re-decreed null. if (inst->pi_val.pv_dkind == DEC_NULL) { return 0; } assert(inst->pi_val.pv_dkind == DEC_PART); // If we have been rejected by a majority, attempt reconnection. if (DEATH_ADJUSTED(inst->pi_rejects) >= majority()) { // See if we can reconnect to the acceptor we tried to part. acc = acceptor_find(&pax->alist, inst->pi_val.pv_extra); assert(acc->pa_peer == NULL); // Defer computation until the client performs connection. If it succeeds, // replace the part decree with a null decree; otherwise, just redecree // the part. We bind the instance number of the decree as callback data. k = continuation_new(continue_ack_reject, acc->pa_paxid); k->pk_data.inum = inst->pi_hdr.ph_inum; ERR_RET(r, state.connect(acc->pa_desc, acc->pa_size, &k->pk_cb)); return 0; } // If we have heard back from everyone but the accepts and rejects are tied, // just decree the part again. if (inst->pi_votes < majority() && DEATH_ADJUSTED(inst->pi_rejects) < majority() && inst->pi_votes + inst->pi_rejects == pax->live_count) { return paxos_broadcast_instance(inst); } return 0; }
/** * proposer_ack_redirect - Resolve an acceptor's claim that we are not the * true proposer. * * If we send a prepare to an acceptor who does not believe us to be the * true proposer, the acceptor will respond with a redirect. Since the * correctness of Paxos guarantees that the acceptor list has a consistent * total ordering, receiving a redirect means that there is someone more * fitting to be proposer who we have lost contact with. * * Note that this does not necessarily mean that the identified proposer is * still live; it is possible that we noticed a proposer failure and then * prepared before the acceptor who sent the redirect detected the failure. * To avoid this as much as possible, we wait for a majority of redirects * before accepting defeat and attempting reconnection to our superior. If * we "win" with a majority completing the prepare, then we drop the former * proposer regardless of whether he has some connections still open. */ int proposer_ack_redirect(struct paxos_header *hdr, msgpack_object *o) { int r; struct paxos_header orig_hdr; struct paxos_acceptor *acc; struct paxos_continuation *k; // We dispatched as the proposer, so we do not need to check again whether // we think ourselves to be the proposer. Instead, just sanity check that // the supposed true proposer has a lower ID than we do. This should // always be the case because of the consistency of proposer ranks. assert(hdr->ph_inum < pax->self_id); // If we are not still preparing, either we succeeded or our prepare was // rejected. In the former case, we should ignore the redirect because // we have affirmed our proposership with a majority vote. In the latter // case, if we connected to the true proposer, we would have dispatched // as an acceptor; and if we did not successfully connect, we would have // sent out another prepare. Hence, if we are not preparing, our prepare // succeeded and hence we should ignore the redirect. if (pax->prep == NULL) { return 0; } // Ensure that the redirect is for our current prepare; otherwise ignore. paxos_header_unpack(&orig_hdr, o); if (ballot_compare(orig_hdr.ph_ballot, pax->prep->pp_ballot) != 0) { return 0; } // Acknowledge the rejection of our prepare. pax->prep->pp_redirects++; // If we have been redirected by a majority, attempt reconnection. If a // majority redirects, our prepare will never succeed, but we defer freeing // it until reconnection occurs. This provides us with the guarantee (used // above) that an acceptor who identifies as the proposer and whose prepare // is non-NULL has either successfully prepared or has not yet begun its // prepare cycle. if (DEATH_ADJUSTED(pax->prep->pp_redirects) >= majority()) { // Connect to the higher-ranked acceptor indicated in the most recent // redirect message we received (i.e., this one). It's possible that an // even higher-ranked acceptor exists, but we'll find that out when we // try to send a request. acc = acceptor_find(&pax->alist, hdr->ph_inum); assert(acc->pa_peer == NULL); // Defer computation until the client performs connection. If it succeeds, // give up the prepare; otherwise, reprepare. k = continuation_new(continue_ack_redirect, acc->pa_paxid); ERR_RET(r, state.connect(acc->pa_desc, acc->pa_size, &k->pk_cb)); return 0; } // If we have heard back from everyone but the acks and redirects are tied, // just prepare again. if (pax->prep->pp_acks < majority() && DEATH_ADJUSTED(pax->prep->pp_redirects) < majority() && pax->prep->pp_acks + pax->prep->pp_redirects == pax->live_count) { g_free(pax->prep); pax->prep = NULL; return proposer_prepare(NULL); } return 0; }