void ReplicatedLog::OnLearnChosen(PaxosMessage& imsg) { uint64_t runID; #ifdef RLOG_DEBUG_MESSAGES Log_Debug("OnLearnChosen begin"); #endif if (context->GetDatabase()->IsCommiting()) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Database is commiting, dropping Paxos message"); #endif return; } Log_Trace(); if (imsg.paxosID > paxosID) { RequestChosen(imsg.nodeID); // I am lagging and need to catch-up return; } else if (imsg.paxosID < paxosID) return; if (imsg.type == PAXOS_LEARN_VALUE) { runID = imsg.runID; acceptor.state.accepted = true; acceptor.state.acceptedValue.Write(imsg.value); } else if (imsg.type == PAXOS_LEARN_PROPOSAL && acceptor.state.accepted && acceptor.state.acceptedProposalID == imsg.proposalID) { runID = acceptor.state.acceptedRunID; } else { RequestChosen(imsg.nodeID); return; } ProcessLearnChosen(imsg.nodeID, runID); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("OnLearnChosen end"); #endif }
bool ReplicatedLog::OnLearnChosen(PaxosMessage& imsg) { uint64_t runID; #ifdef RLOG_DEBUG_MESSAGES Log_Debug("OnLearnChosen begin"); #endif if (context->GetDatabase()->IsCommitting()) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Database is commiting, dropping Paxos message"); #endif return true; } if (waitingOnAppend) { #ifdef RLOG_DEBUG_MESSAGES Log_Debug("Waiting OnAppend, dropping Paxos message"); #endif return true; } // if I was the primary, lost the lease in the middle of round 1000, another node replicated some rounds 1001-1100, // I get the lease back, and then receive the learn message for my previous round (1000) // then multi will be turned back on, but then subsequent rounds (1001-1100), where I will // receive learn messages will not succeed, and due to the code above // I would throw it away any learn messages sent to me //if (imsg.nodeID != MY_NODEID && proposer.state.multi) //{ // Log_Debug("Received learn message from %U, but I'm in multi paxos mode", imsg.nodeID); // return true; //} // it's valid for me to be the primary and be lagging by one round * at the beginning of my lease * // this can happen if the old primary completes a round of replication, fails // and I get the lease before its OnLearnChosen arrives // if I throw away the OnLearnChosen, but the others in the quorum do not // they will advance their paxosID, I will not advance mine // and I will not be able to replicate => read-only cluster //if (imsg.nodeID != MY_NODEID && context->IsLeaseOwner()) //{ // Log_Debug("Received learn message from %U, but I'm the lease owner", imsg.nodeID); // return true; //} Log_Trace(); if (imsg.paxosID > paxosID) { RequestChosen(imsg.nodeID); // I am lagging and need to catch-up return true; } else if (imsg.paxosID < paxosID) return true; if (imsg.type == PAXOS_LEARN_VALUE) { runID = 0; // in the PAXOS_LEARN_VALUE case (and only in this case) runID is 0 // for legacy reasons the PAXOS_LEARN_VALUE message also includes a runID, // which is always set to 0 acceptor.state.accepted = true; acceptor.state.acceptedValue.Write(imsg.value); acceptor.WriteState(); } else if (imsg.type == PAXOS_LEARN_PROPOSAL && acceptor.state.accepted && acceptor.state.acceptedProposalID == imsg.proposalID) { runID = acceptor.state.acceptedRunID; } else { RequestChosen(imsg.nodeID); return true; } ProcessLearnChosen(imsg.nodeID, runID); #ifdef RLOG_DEBUG_MESSAGES Log_Debug("OnLearnChosen end"); #endif return false; }