static void proposer_preexecute(struct evproposer* p) { int i; prepare_req pr; int count = p->preexec_window - proposer_prepared_count(p->state); //先进先出队列保存准备实例,队列长128,用一个释放一个,有空间了就把新的加到队列末尾。 if (count <= 0) return; for (i = 0; i < count; i++) { proposer_prepare(p->state, &pr); //发送prepare请求 send_prepares(p, &pr); } paxos_log_debug("Opened %d new instances", count); }
static void proposer_preexecute(struct evproposer* p) { int i; prepare_req pr; /*获得可以发起提案的个数*/ int count = p->preexec_window - proposer_prepared_count(p->state); for(i = 0; i < count; i ++){ /*构建一个prepare_req消息*/ proposer_prepare(p->state, &pr); /*发起一个提案*/ send_prepares(p, &pr); } }
static void proposer_preexecute(struct proposer *p) { int i; paxos_message msg; msg.type = PAXOS_PREPARE; int count = BURST_SIZE - proposer_prepared_count(p); if (count <= 0) return; for (i = 0; i < count; i++) { proposer_prepare(p, &msg.u.prepare); rte_log(RTE_LOG_DEBUG, RTE_LOGTYPE_USER8, "%s Prepare instance %d ballot %d\n", __func__, msg.u.prepare.iid, msg.u.prepare.ballot); send_paxos_message(&msg); } }
/** * continue_ack_redirect - If we were able to reestablish connection with the * purported proposer, relinquish our proposership, clear our defer list, * and reintroduce ourselves. Otherwise, try preparing again. */ int do_continue_ack_redirect(GIOChannel *chan, struct paxos_acceptor *acc, struct paxos_continuation *k) { // Sanity check the choice of acc. assert(acc->pa_paxid < pax->self_id); // If connection to the acceptor has already been reestablished, we should // no longer be the proposer and we can simply return. if (acc->pa_peer != NULL) { assert(!is_proposer()); return 0; } // Free the old prepare regardless of whether reconnection succeeded. g_free(pax->prep); pax->prep = NULL; // Register the reconnection; on failure, reprepare. acc->pa_peer = paxos_peer_init(chan); if (acc->pa_peer != NULL) { // Account for a new acceptor. pax->live_count++; // We update the proposer only if we have not reconnected to an even // higher-ranked acceptor. if (acc->pa_paxid < pax->proposer->pa_paxid) { pax->proposer = acc; } // Destroy the defer list; we're finished trying to prepare. // XXX: Do we want to somehow pass it to the real proposer? How do we // know which requests were made for us? instance_container_destroy(&pax->idefer); // Say hello. return paxos_hello(acc); } else { // Prepare again, continuing to append to the defer list. return proposer_prepare(NULL); } }
/** * proposer_ack_redirect - Resolve an acceptor's claim that we are not the * true proposer. * * If we send a prepare to an acceptor who does not believe us to be the * true proposer, the acceptor will respond with a redirect. Since the * correctness of Paxos guarantees that the acceptor list has a consistent * total ordering, receiving a redirect means that there is someone more * fitting to be proposer who we have lost contact with. * * Note that this does not necessarily mean that the identified proposer is * still live; it is possible that we noticed a proposer failure and then * prepared before the acceptor who sent the redirect detected the failure. * To avoid this as much as possible, we wait for a majority of redirects * before accepting defeat and attempting reconnection to our superior. If * we "win" with a majority completing the prepare, then we drop the former * proposer regardless of whether he has some connections still open. */ int proposer_ack_redirect(struct paxos_header *hdr, msgpack_object *o) { int r; struct paxos_header orig_hdr; struct paxos_acceptor *acc; struct paxos_continuation *k; // We dispatched as the proposer, so we do not need to check again whether // we think ourselves to be the proposer. Instead, just sanity check that // the supposed true proposer has a lower ID than we do. This should // always be the case because of the consistency of proposer ranks. assert(hdr->ph_inum < pax->self_id); // If we are not still preparing, either we succeeded or our prepare was // rejected. In the former case, we should ignore the redirect because // we have affirmed our proposership with a majority vote. In the latter // case, if we connected to the true proposer, we would have dispatched // as an acceptor; and if we did not successfully connect, we would have // sent out another prepare. Hence, if we are not preparing, our prepare // succeeded and hence we should ignore the redirect. if (pax->prep == NULL) { return 0; } // Ensure that the redirect is for our current prepare; otherwise ignore. paxos_header_unpack(&orig_hdr, o); if (ballot_compare(orig_hdr.ph_ballot, pax->prep->pp_ballot) != 0) { return 0; } // Acknowledge the rejection of our prepare. pax->prep->pp_redirects++; // If we have been redirected by a majority, attempt reconnection. If a // majority redirects, our prepare will never succeed, but we defer freeing // it until reconnection occurs. This provides us with the guarantee (used // above) that an acceptor who identifies as the proposer and whose prepare // is non-NULL has either successfully prepared or has not yet begun its // prepare cycle. if (DEATH_ADJUSTED(pax->prep->pp_redirects) >= majority()) { // Connect to the higher-ranked acceptor indicated in the most recent // redirect message we received (i.e., this one). It's possible that an // even higher-ranked acceptor exists, but we'll find that out when we // try to send a request. acc = acceptor_find(&pax->alist, hdr->ph_inum); assert(acc->pa_peer == NULL); // Defer computation until the client performs connection. If it succeeds, // give up the prepare; otherwise, reprepare. k = continuation_new(continue_ack_redirect, acc->pa_paxid); ERR_RET(r, state.connect(acc->pa_desc, acc->pa_size, &k->pk_cb)); return 0; } // If we have heard back from everyone but the acks and redirects are tied, // just prepare again. if (pax->prep->pp_acks < majority() && DEATH_ADJUSTED(pax->prep->pp_redirects) < majority() && pax->prep->pp_acks + pax->prep->pp_redirects == pax->live_count) { g_free(pax->prep); pax->prep = NULL; return proposer_prepare(NULL); } return 0; }
/** * paxos_learn - Do something useful with the value of a commit. * * Note that we cannot free up the instance or any request associated with * it until a sync. */ int paxos_learn(struct paxos_instance *inst, struct paxos_request *req) { int r = 0; struct paxos_acceptor *acc; // Mark the learn. inst->pi_learned = true; // Act on the decree (e.g., display chat, record acceptor list changes). switch (inst->pi_val.pv_dkind) { case DEC_NULL: break; case DEC_CHAT: // Grab the message sender. acc = acceptor_find(&pax->alist, req->pr_val.pv_reqid.id); assert(acc != NULL); // Invoke client learning callback. state.learn.chat(req->pr_data, req->pr_size, acc->pa_desc, acc->pa_size, pax->client_data); break; case DEC_JOIN: // Check the adefer list to see if we received a hello already for the // newly joined acceptor. acc = acceptor_find(&pax->adefer, inst->pi_hdr.ph_inum); if (acc != NULL) { // We found a deferred hello. To complete the hello, just move our // acceptor over to the alist and increment the live count. LIST_REMOVE(&pax->adefer, acc, pa_le); pax->live_count++; } else { // We have not yet gotten the hello, so create a new acceptor. acc = g_malloc0(sizeof(*acc)); acc->pa_paxid = inst->pi_hdr.ph_inum; } acceptor_insert(&pax->alist, acc); // Copy over the identity information. acc->pa_size = req->pr_size; acc->pa_desc = g_memdup(req->pr_data, req->pr_size); // If we are the proposer, we are responsible for connecting to the new // acceptor, as well as for sending the new acceptor its paxid and other // initial data. if (is_proposer()) { proposer_welcome(acc); } // Invoke client learning callback. state.learn.join(req->pr_data, req->pr_size, acc->pa_desc, acc->pa_size, pax->client_data); break; case DEC_PART: case DEC_KILL: // Grab the acceptor from the alist. acc = acceptor_find(&pax->alist, inst->pi_val.pv_extra); if (acc == NULL) { // It is possible that we may part twice; for instance, if a proposer // issues a part for itself but its departure from the system is // detected by acceptors before the part commit is received. In this // case, just do nothing. break; } // Invoke client learning callback. state.learn.part(acc->pa_desc, acc->pa_size, acc->pa_desc, acc->pa_size, pax->client_data); // If we are being parted, leave the protocol. if (acc->pa_paxid == pax->self_id) { return paxos_end(pax); } // Take the parted acceptor off the list and do accounting if it was // still live. LIST_REMOVE(&pax->alist, acc, pa_le); if (acc->pa_peer != NULL) { pax->live_count--; } // If we just parted our proposer, "elect" a new one. If it's us, send // a prepare. if (acc->pa_paxid == pax->proposer->pa_paxid) { reset_proposer(); if (is_proposer()) { r = proposer_prepare(acc); } } // Free the parted acceptor. acceptor_destroy(acc); break; } return r; }