int Learner :: OnSendCheckpoint_End(const CheckpointMsg & oCheckpointMsg) { if (!m_oCheckpointReceiver.IsReceiverFinish(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence())) { PLGErr("receive end msg but receiver not finish"); return -1; } BP->GetCheckpointBP()->ReceiveCheckpointDone(); std::vector<StateMachine *> vecSMList = m_poSMFac->GetSMList(); for (auto & poSM : vecSMList) { if (poSM->SMID() == SYSTEM_V_SMID || poSM->SMID() == MASTER_V_SMID) { //system variables sm no checkpoint //master variables sm no checkpoint continue; } string sTmpDirPath = m_oCheckpointReceiver.GetTmpDirPath(poSM->SMID()); std::vector<std::string> vecFilePathList; int ret = FileUtils :: IterDir(sTmpDirPath, vecFilePathList); if (ret != 0) { PLGErr("IterDir fail, dirpath %s", sTmpDirPath.c_str()); } if (vecFilePathList.size() == 0) { PLGImp("this sm %d have no checkpoint", poSM->SMID()); continue; } ret = poSM->LoadCheckpointState( m_poConfig->GetMyGroupIdx(), sTmpDirPath, vecFilePathList, oCheckpointMsg.checkpointinstanceid()); if (ret != 0) { BP->GetCheckpointBP()->ReceiveCheckpointAndLoadFail(); return ret; } } BP->GetCheckpointBP()->ReceiveCheckpointAndLoadSucc(); PLGImp("All sm load state ok, start to exit process"); exit(-1); return 0; }
void Learner :: OnSendNowInstanceID(const PaxosMsg & oPaxosMsg) { BP->GetLearnerBP()->OnSendNowInstanceID(); PLGHead("START Msg.InstanceID %lu Now.InstanceID %lu Msg.from_nodeid %lu Msg.MaxInstanceID %lu systemvariables_size %zu mastervariables_size %zu", oPaxosMsg.instanceid(), GetInstanceID(), oPaxosMsg.nodeid(), oPaxosMsg.nowinstanceid(), oPaxosMsg.systemvariables().size(), oPaxosMsg.mastervariables().size()); SetSeenInstanceID(oPaxosMsg.nowinstanceid(), oPaxosMsg.nodeid()); bool bSystemVariablesChange = false; int ret = m_poConfig->GetSystemVSM()->UpdateByCheckpoint(oPaxosMsg.systemvariables(), bSystemVariablesChange); if (ret == 0 && bSystemVariablesChange) { PLGHead("SystemVariables changed!, all thing need to reflesh, so skip this msg"); return; } bool bMasterVariablesChange = false; if (m_poConfig->GetMasterSM() != nullptr) { ret = m_poConfig->GetMasterSM()->UpdateByCheckpoint(oPaxosMsg.mastervariables(), bMasterVariablesChange); if (ret == 0 && bMasterVariablesChange) { PLGHead("MasterVariables changed!"); } } if (oPaxosMsg.instanceid() != GetInstanceID()) { PLGErr("Lag msg, skip"); return; } if (oPaxosMsg.nowinstanceid() <= GetInstanceID()) { PLGErr("Lag msg, skip"); return; } if (oPaxosMsg.minchoseninstanceid() > GetInstanceID()) { BP->GetCheckpointBP()->NeedAskforCheckpoint(); PLGHead("my instanceid %lu small than other's minchoseninstanceid %lu, other nodeid %lu", GetInstanceID(), oPaxosMsg.minchoseninstanceid(), oPaxosMsg.nodeid()); AskforCheckpoint(oPaxosMsg.nodeid()); } else if (!m_bIsIMLearning) { ComfirmAskForLearn(oPaxosMsg.nodeid()); } }
int IOLoop :: AddMessage(const char * pcMessage, const int iMessageLen) { m_oMessageQueue.lock(); BP->GetIOLoopBP()->EnqueueMsg(); if ((int)m_oMessageQueue.size() > QUEUE_MAXLENGTH) { BP->GetIOLoopBP()->EnqueueMsgRejectByFullQueue(); PLGErr("Queue full, skip msg"); m_oMessageQueue.unlock(); return -2; } if (m_iQueueMemSize > MAX_QUEUE_MEM_SIZE) { PLErr("queue memsize %d too large, can't enqueue", m_iQueueMemSize); m_oMessageQueue.unlock(); return -2; } m_oMessageQueue.add(new string(pcMessage, iMessageLen)); m_iQueueMemSize += iMessageLen; m_oMessageQueue.unlock(); return 0; }
void Base :: PackBaseMsg(const std::string & sBodyBuffer, const int iCmd, std::string & sBuffer) { char sGroupIdx[GROUPIDXLEN] = {0}; int iGroupIdx = m_poConfig->GetMyGroupIdx(); memcpy(sGroupIdx, &iGroupIdx, sizeof(sGroupIdx)); Header oHeader; oHeader.set_gid(m_poConfig->GetGid()); oHeader.set_rid(0); oHeader.set_cmdid(iCmd); oHeader.set_version(1); std::string sHeaderBuffer; bool bSucc = oHeader.SerializeToString(&sHeaderBuffer); if (!bSucc) { PLGErr("Header.SerializeToString fail, skip this msg"); assert(bSucc == true); } char sHeaderLen[HEADLEN_LEN] = {0}; uint16_t iHeaderLen = (uint16_t)sHeaderBuffer.size(); memcpy(sHeaderLen, &iHeaderLen, sizeof(sHeaderLen)); sBuffer = string(sGroupIdx, sizeof(sGroupIdx)) + string(sHeaderLen, sizeof(sHeaderLen)) + sHeaderBuffer + sBodyBuffer; //check sum uint32_t iBufferChecksum = crc32(0, (const uint8_t *)sBuffer.data(), sBuffer.size(), NET_CRC32SKIP); char sBufferChecksum[CHECKSUM_LEN] = {0}; memcpy(sBufferChecksum, &iBufferChecksum, sizeof(sBufferChecksum)); sBuffer += string(sBufferChecksum, sizeof(sBufferChecksum)); }
bool Cleaner :: DeleteOne(const uint64_t llInstanceID) { WriteOptions oWriteOptions; oWriteOptions.bSync = false; int ret = m_poLogStorage->Del(oWriteOptions, m_poConfig->GetMyGroupIdx(), llInstanceID); if (ret != 0) { return false; } m_poCheckpointMgr->SetMinChosenInstanceIDCache(llInstanceID); if (llInstanceID >= m_llLastSave + DELETE_SAVE_INTERVAL) { int ret = m_poCheckpointMgr->SetMinChosenInstanceID(llInstanceID + 1); if (ret != 0) { PLGErr("SetMinChosenInstanceID fail, now delete instanceid %lu", llInstanceID); return false; } m_llLastSave = llInstanceID; } return true; }
void Cleaner :: run() { m_bIsStart = true; Continue(); uint64_t llInstanceID = m_poCheckpointMgr->GetMinChosenInstanceID(); while (true) { if (m_bIsEnd) { PLGHead("Checkpoint.Cleaner [END]"); return; } if (!m_bCanrun) { PLGImp("Pausing, sleep"); m_bIsPaused = true; Time::MsSleep(1000); continue; } uint64_t llCPInstanceID = m_poSMFac->GetCheckpointInstanceID(m_poConfig->GetMyGroupIdx()) + 1; while (llInstanceID + m_llHoldCount < llCPInstanceID) { bool bDeleteRet = DeleteOne(llInstanceID); if (bDeleteRet) { PLGImp("delete one done, instanceid %lu", llInstanceID); llInstanceID++; } else { PLGErr("delete system fail, instanceid %lu", llInstanceID); break; } } if (llCPInstanceID == 0) { PLGImp("sleep a while, max deleted instanceid %lu checkpoint instanceid (no checkpoint) now instanceid %lu", llInstanceID, m_poCheckpointMgr->GetMaxChosenInstanceID()); } else { PLGImp("sleep a while, max deleted instanceid %lu checkpoint instanceid %lu now instanceid %lu", llInstanceID, llCPInstanceID, m_poCheckpointMgr->GetMaxChosenInstanceID()); } Time::MsSleep(1000); } }
int CheckpointReceiver :: CreateDir(const std::string & sDirPath) { if (access(sDirPath.c_str(), F_OK) == -1) { if (mkdir(sDirPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) == -1) { PLGErr("Create dir fail, path %s", sDirPath.c_str()); return -1; } } return 0; }
void Learner :: OnAskforCheckpoint(const PaxosMsg & oPaxosMsg) { CheckpointSender * poCheckpointSender = GetNewCheckpointSender(oPaxosMsg.nodeid()); if (poCheckpointSender != nullptr) { poCheckpointSender->start(); PLGHead("new checkpoint sender started, send to nodeid %lu", oPaxosMsg.nodeid()); } else { PLGErr("Checkpoint Sender is running"); } }
void Learner :: OnAskforLearn(const PaxosMsg & oPaxosMsg) { BP->GetLearnerBP()->OnAskforLearn(); PLGHead("START Msg.InstanceID %lu Now.InstanceID %lu Msg.from_nodeid %lu MinChosenInstanceID %lu", oPaxosMsg.instanceid(), GetInstanceID(), oPaxosMsg.nodeid(), m_poCheckpointMgr->GetMinChosenInstanceID()); SetSeenInstanceID(oPaxosMsg.instanceid(), oPaxosMsg.nodeid()); if (oPaxosMsg.proposalnodeid() == m_poConfig->GetMyNodeID()) { //Found a node follow me. PLImp("Found a node %lu follow me.", oPaxosMsg.nodeid()); m_poConfig->AddFollowerNode(oPaxosMsg.nodeid()); } if (oPaxosMsg.instanceid() >= GetInstanceID()) { return; } if (oPaxosMsg.instanceid() >= m_poCheckpointMgr->GetMinChosenInstanceID()) { if (!m_oLearnerSender.Prepare(oPaxosMsg.instanceid(), oPaxosMsg.nodeid())) { BP->GetLearnerBP()->OnAskforLearnGetLockFail(); PLGErr("LearnerSender working for others."); if (oPaxosMsg.instanceid() == (GetInstanceID() - 1)) { PLGImp("InstanceID only difference one, just send this value to other."); //send one value AcceptorStateData oState; int ret = m_oPaxosLog.ReadState(m_poConfig->GetMyGroupIdx(), oPaxosMsg.instanceid(), oState); if (ret == 0) { BallotNumber oBallot(oState.acceptedid(), oState.acceptednodeid()); SendLearnValue(oPaxosMsg.nodeid(), oPaxosMsg.instanceid(), oBallot, oState.acceptedvalue(), 0); } } return; } } SendNowInstanceID(oPaxosMsg.instanceid(), oPaxosMsg.nodeid()); }
void Proposer :: OnAcceptTimeout() { PLGHead("OK"); if (GetInstanceID() != m_llTimeoutInstanceID) { PLGErr("TimeoutInstanceID %lu not same to NowInstanceID %lu, skip", m_llTimeoutInstanceID, GetInstanceID()); return; } BP->GetProposerBP()->AcceptTimeout(); Prepare(m_bWasRejectBySomeone); }
int Base :: PackCheckpointMsg(const CheckpointMsg & oCheckpointMsg, std::string & sBuffer) { std::string sBodyBuffer; bool bSucc = oCheckpointMsg.SerializeToString(&sBodyBuffer); if (!bSucc) { PLGErr("CheckpointMsg.SerializeToString fail, skip this msg"); return -1; } int iCmd = MsgCmd_CheckpointMsg; PackBaseMsg(sBodyBuffer, iCmd, sBuffer); return 0; }
void Learner :: OnComfirmAskForLearn(const PaxosMsg & oPaxosMsg) { BP->GetLearnerBP()->OnComfirmAskForLearn(); PLGHead("START Msg.InstanceID %lu Msg.from_nodeid %lu", oPaxosMsg.instanceid(), oPaxosMsg.nodeid()); if (!m_oLearnerSender.Comfirm(oPaxosMsg.instanceid(), oPaxosMsg.nodeid())) { BP->GetLearnerBP()->OnComfirmAskForLearnGetLockFail(); PLGErr("LearnerSender comfirm fail, maybe is lag msg"); return; } PLGImp("OK, success comfirm"); }
bool Replayer :: PlayOne(const uint64_t llInstanceID) { AcceptorStateData oState; int ret = m_oPaxosLog.ReadState(m_poConfig->GetMyGroupIdx(), llInstanceID, oState); if (ret != 0) { return false; } bool bExecuteRet = m_poSMFac->ExecuteForCheckpoint( m_poConfig->GetMyGroupIdx(), llInstanceID, oState.acceptedvalue()); if (!bExecuteRet) { PLGErr("Checkpoint sm excute fail, instanceid %lu", llInstanceID); } return bExecuteRet; }
int Learner :: OnSendCheckpoint_Begin(const CheckpointMsg & oCheckpointMsg) { int ret = m_oCheckpointReceiver.NewReceiver(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid()); if (ret == 0) { PLGImp("NewReceiver ok"); ret = m_poCheckpointMgr->SetMinChosenInstanceID(oCheckpointMsg.checkpointinstanceid()); if (ret != 0) { PLGErr("SetMinChosenInstanceID fail, ret %d CheckpointInstanceID %lu", ret, oCheckpointMsg.checkpointinstanceid()); return ret; } } return ret; }
void Replayer :: run() { PLGHead("Checkpoint.Replayer [START]"); uint64_t llInstanceID = m_poSMFac->GetCheckpointInstanceID(m_poConfig->GetMyGroupIdx()) + 1; while (true) { if (m_bIsEnd) { PLGHead("Checkpoint.Replayer [END]"); return; } if (!m_bCanrun) { //PLGImp("Pausing, sleep"); m_bIsPaused = true; Time::MsSleep(1000); continue; } if (llInstanceID >= m_poCheckpointMgr->GetMaxChosenInstanceID()) { //PLGImp("now maxchosen instanceid %lu small than excute instanceid %lu, wait", //m_poCheckpointMgr->GetMaxChosenInstanceID(), llInstanceID); Time::MsSleep(1000); continue; } bool bPlayRet = PlayOne(llInstanceID); if (bPlayRet) { PLGImp("Play one done, instanceid %lu", llInstanceID); llInstanceID++; } else { PLGErr("Play one fail, instanceid %lu", llInstanceID); Time::MsSleep(500); } } }
void Learner :: OnSendLearnValue(const PaxosMsg & oPaxosMsg) { BP->GetLearnerBP()->OnSendLearnValue(); PLGHead("START Msg.InstanceID %lu Now.InstanceID %lu Msg.ballot_proposalid %lu Msg.ballot_nodeid %lu Msg.ValueSize %zu", oPaxosMsg.instanceid(), GetInstanceID(), oPaxosMsg.proposalid(), oPaxosMsg.nodeid(), oPaxosMsg.value().size()); if (oPaxosMsg.instanceid() > GetInstanceID()) { PLGDebug("[Latest Msg] i can't learn"); return; } else if (oPaxosMsg.instanceid() < GetInstanceID()) { PLGDebug("[Lag Msg] no need to learn"); return; } //learn value BallotNumber oBallot(oPaxosMsg.proposalid(), oPaxosMsg.proposalnodeid()); int ret = m_oLearnerState.LearnValue(oPaxosMsg.instanceid(), oBallot, oPaxosMsg.value(), GetLastChecksum()); if (ret != 0) { PLGErr("LearnState.LearnValue fail, ret %d", ret); return; } PLGHead("END LearnValue OK, proposalid %lu proposalid_nodeid %lu valueLen %zu", oPaxosMsg.proposalid(), oPaxosMsg.nodeid(), oPaxosMsg.value().size()); if (oPaxosMsg.flag() == PaxosMsgFlagType_SendLearnValue_NeedAck) { //every time' when receive valid need ack learn value, reset noop timeout. Reset_AskforLearn_Noop(); SendLearnValue_Ack(oPaxosMsg.nodeid()); } }
int LearnerState :: LearnValue(const uint64_t llInstanceID, const BallotNumber & oLearnedBallot, const std::string & sValue, const uint32_t iLastChecksum) { if (llInstanceID > 0 && iLastChecksum == 0) { m_iNewChecksum = 0; } else if (sValue.size() > 0) { m_iNewChecksum = crc32(iLastChecksum, (const uint8_t *)sValue.data(), sValue.size(), CRC32SKIP); } AcceptorStateData oState; oState.set_instanceid(llInstanceID); oState.set_acceptedvalue(sValue); oState.set_promiseid(oLearnedBallot.m_llProposalID); oState.set_promisenodeid(oLearnedBallot.m_llNodeID); oState.set_acceptedid(oLearnedBallot.m_llProposalID); oState.set_acceptednodeid(oLearnedBallot.m_llNodeID); oState.set_checksum(m_iNewChecksum); WriteOptions oWriteOptions; oWriteOptions.bSync = false; int ret = m_oPaxosLog.WriteState(oWriteOptions, m_poConfig->GetMyGroupIdx(), llInstanceID, oState); if (ret != 0) { PLGErr("LogStorage.WriteLog fail, InstanceID %lu ValueLen %zu ret %d", llInstanceID, sValue.size(), ret); return ret; } LearnValueWithoutWrite(llInstanceID, sValue, m_iNewChecksum); PLGDebug("OK, InstanceID %lu ValueLen %zu checksum %u", llInstanceID, sValue.size(), m_iNewChecksum); return 0; }
void Learner :: OnSendCheckpoint(const CheckpointMsg & oCheckpointMsg) { PLGHead("START uuid %lu flag %d sequence %lu cpi %lu checksum %u smid %d offset %lu buffsize %zu filepath %s", oCheckpointMsg.uuid(), oCheckpointMsg.flag(), oCheckpointMsg.sequence(), oCheckpointMsg.checkpointinstanceid(), oCheckpointMsg.checksum(), oCheckpointMsg.smid(), oCheckpointMsg.offset(), oCheckpointMsg.buffer().size(), oCheckpointMsg.filepath().c_str()); int ret = 0; if (oCheckpointMsg.flag() == CheckpointSendFileFlag_BEGIN) { ret = OnSendCheckpoint_Begin(oCheckpointMsg); } else if (oCheckpointMsg.flag() == CheckpointSendFileFlag_ING) { ret = OnSendCheckpoint_Ing(oCheckpointMsg); } else if (oCheckpointMsg.flag() == CheckpointSendFileFlag_END) { ret = OnSendCheckpoint_End(oCheckpointMsg); } if (ret != 0) { PLGErr("[FAIL] reset checkpoint receiver and reset askforlearn"); m_oCheckpointReceiver.Reset(); Reset_AskforLearn_Noop(5000); SendCheckpointAck(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence(), CheckpointSendFileAckFlag_Fail); } else { SendCheckpointAck(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence(), CheckpointSendFileAckFlag_OK); Reset_AskforLearn_Noop(120000); } }
int CheckpointReceiver :: NewReceiver(const nodeid_t iSenderNodeID, const uint64_t llUUID) { int ret = ClearCheckpointTmp(); if (ret != 0) { return ret; } ret = m_poLogStorage->ClearAllLog(m_poConfig->GetMyGroupIdx()); if (ret != 0) { PLGErr("ClearAllLog fail, groupidx %d ret %d", m_poConfig->GetMyGroupIdx(), ret); return ret; } m_mapHasInitDir.clear(); m_iSenderNodeID = iSenderNodeID; m_llUUID = llUUID; m_llSequence = 0; return 0; }
int CheckpointReceiver :: ReceiveCheckpoint(const CheckpointMsg & oCheckpointMsg) { if (oCheckpointMsg.nodeid() != m_iSenderNodeID || oCheckpointMsg.uuid() != m_llUUID) { PLGErr("msg not valid, Msg.SenderNodeID %lu Receiver.SenderNodeID %lu Msg.UUID %lu Receiver.UUID %lu", oCheckpointMsg.nodeid(), m_iSenderNodeID, oCheckpointMsg.uuid(), m_llUUID); return -2; } if (oCheckpointMsg.sequence() == m_llSequence) { PLGErr("msg already receive, skip, Msg.Sequence %lu Receiver.Sequence %lu", oCheckpointMsg.sequence(), m_llSequence); return 0; } if (oCheckpointMsg.sequence() != m_llSequence + 1) { PLGErr("msg sequence wrong, Msg.Sequence %lu Receiver.Sequence %lu", oCheckpointMsg.sequence(), m_llSequence); return -2; } string sFilePath = GetTmpDirPath(oCheckpointMsg.smid()) + "/" + oCheckpointMsg.filepath(); string sFormatFilePath; int ret = InitFilePath(sFilePath, sFormatFilePath); if (ret != 0) { return -1; } int iFd = open(sFormatFilePath.c_str(), O_CREAT | O_RDWR | O_APPEND, S_IWRITE | S_IREAD); if (iFd == -1) { PLGErr("open file fail, filepath %s", sFormatFilePath.c_str()); return -1; } size_t llFileOffset = lseek(iFd, 0, SEEK_END); if ((uint64_t)llFileOffset != oCheckpointMsg.offset()) { PLGErr("file.offset %zu not equal to msg.offset %lu", llFileOffset, oCheckpointMsg.offset()); close(iFd); return -2; } size_t iWriteLen = write(iFd, oCheckpointMsg.buffer().data(), oCheckpointMsg.buffer().size()); if (iWriteLen != oCheckpointMsg.buffer().size()) { PLGImp("write fail, writelen %zu buffer size %zu", iWriteLen, oCheckpointMsg.buffer().size()); close(iFd); return -1; } m_llSequence++; close(iFd); PLGImp("END ok, writelen %zu", iWriteLen); return 0; }