int Learner :: OnSendCheckpoint_End(const CheckpointMsg & oCheckpointMsg) { if (!m_oCheckpointReceiver.IsReceiverFinish(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence())) { PLGErr("receive end msg but receiver not finish"); return -1; } BP->GetCheckpointBP()->ReceiveCheckpointDone(); std::vector<StateMachine *> vecSMList = m_poSMFac->GetSMList(); for (auto & poSM : vecSMList) { if (poSM->SMID() == SYSTEM_V_SMID || poSM->SMID() == MASTER_V_SMID) { //system variables sm no checkpoint //master variables sm no checkpoint continue; } string sTmpDirPath = m_oCheckpointReceiver.GetTmpDirPath(poSM->SMID()); std::vector<std::string> vecFilePathList; int ret = FileUtils :: IterDir(sTmpDirPath, vecFilePathList); if (ret != 0) { PLGErr("IterDir fail, dirpath %s", sTmpDirPath.c_str()); } if (vecFilePathList.size() == 0) { PLGImp("this sm %d have no checkpoint", poSM->SMID()); continue; } ret = poSM->LoadCheckpointState( m_poConfig->GetMyGroupIdx(), sTmpDirPath, vecFilePathList, oCheckpointMsg.checkpointinstanceid()); if (ret != 0) { BP->GetCheckpointBP()->ReceiveCheckpointAndLoadFail(); return ret; } } BP->GetCheckpointBP()->ReceiveCheckpointAndLoadSucc(); PLGImp("All sm load state ok, start to exit process"); exit(-1); return 0; }
void Learner :: OnSendCheckpointAck(const CheckpointMsg & oCheckpointMsg) { PLGHead("START flag %d", oCheckpointMsg.flag()); if (m_poCheckpointSender != nullptr && !m_poCheckpointSender->IsEnd()) { if (oCheckpointMsg.flag() == CheckpointSendFileAckFlag_OK) { m_poCheckpointSender->Ack(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence()); } else { m_poCheckpointSender->End(); } } }
int Learner :: OnSendCheckpoint_Begin(const CheckpointMsg & oCheckpointMsg) { int ret = m_oCheckpointReceiver.NewReceiver(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid()); if (ret == 0) { PLGImp("NewReceiver ok"); ret = m_poCheckpointMgr->SetMinChosenInstanceID(oCheckpointMsg.checkpointinstanceid()); if (ret != 0) { PLGErr("SetMinChosenInstanceID fail, ret %d CheckpointInstanceID %lu", ret, oCheckpointMsg.checkpointinstanceid()); return ret; } } return ret; }
int Base :: PackCheckpointMsg(const CheckpointMsg & oCheckpointMsg, std::string & sBuffer) { std::string sBodyBuffer; bool bSucc = oCheckpointMsg.SerializeToString(&sBodyBuffer); if (!bSucc) { PLGErr("CheckpointMsg.SerializeToString fail, skip this msg"); return -1; } int iCmd = MsgCmd_CheckpointMsg; PackBaseMsg(sBodyBuffer, iCmd, sBuffer); return 0; }
int Learner :: SendCheckpointAck( const nodeid_t iSendNodeID, const uint64_t llUUID, const uint64_t llSequence, const int iFlag) { CheckpointMsg oCheckpointMsg; oCheckpointMsg.set_msgtype(CheckpointMsgType_SendFile_Ack); oCheckpointMsg.set_nodeid(m_poConfig->GetMyNodeID()); oCheckpointMsg.set_uuid(llUUID); oCheckpointMsg.set_sequence(llSequence); oCheckpointMsg.set_flag(iFlag); return SendMessage(iSendNodeID, oCheckpointMsg, Message_SendType_TCP); }
int Learner :: SendCheckpointEnd( const nodeid_t iSendNodeID, const uint64_t llUUID, const uint64_t llSequence, const uint64_t llCheckpointInstanceID) { CheckpointMsg oCheckpointMsg; oCheckpointMsg.set_msgtype(CheckpointMsgType_SendFile); oCheckpointMsg.set_nodeid(m_poConfig->GetMyNodeID()); oCheckpointMsg.set_flag(CheckpointSendFileFlag_END); oCheckpointMsg.set_uuid(llUUID); oCheckpointMsg.set_sequence(llSequence); oCheckpointMsg.set_checkpointinstanceid(llCheckpointInstanceID); PLGImp("END, SendNodeID %lu uuid %lu sequence %lu cpi %lu", iSendNodeID, llUUID, llSequence, llCheckpointInstanceID); return SendMessage(iSendNodeID, oCheckpointMsg, Message_SendType_TCP); }
int CheckpointReceiver :: ReceiveCheckpoint(const CheckpointMsg & oCheckpointMsg) { if (oCheckpointMsg.nodeid() != m_iSenderNodeID || oCheckpointMsg.uuid() != m_llUUID) { PLGErr("msg not valid, Msg.SenderNodeID %lu Receiver.SenderNodeID %lu Msg.UUID %lu Receiver.UUID %lu", oCheckpointMsg.nodeid(), m_iSenderNodeID, oCheckpointMsg.uuid(), m_llUUID); return -2; } if (oCheckpointMsg.sequence() == m_llSequence) { PLGErr("msg already receive, skip, Msg.Sequence %lu Receiver.Sequence %lu", oCheckpointMsg.sequence(), m_llSequence); return 0; } if (oCheckpointMsg.sequence() != m_llSequence + 1) { PLGErr("msg sequence wrong, Msg.Sequence %lu Receiver.Sequence %lu", oCheckpointMsg.sequence(), m_llSequence); return -2; } string sFilePath = GetTmpDirPath(oCheckpointMsg.smid()) + "/" + oCheckpointMsg.filepath(); string sFormatFilePath; int ret = InitFilePath(sFilePath, sFormatFilePath); if (ret != 0) { return -1; } int iFd = open(sFormatFilePath.c_str(), O_CREAT | O_RDWR | O_APPEND, S_IWRITE | S_IREAD); if (iFd == -1) { PLGErr("open file fail, filepath %s", sFormatFilePath.c_str()); return -1; } size_t llFileOffset = lseek(iFd, 0, SEEK_END); if ((uint64_t)llFileOffset != oCheckpointMsg.offset()) { PLGErr("file.offset %zu not equal to msg.offset %lu", llFileOffset, oCheckpointMsg.offset()); close(iFd); return -2; } size_t iWriteLen = write(iFd, oCheckpointMsg.buffer().data(), oCheckpointMsg.buffer().size()); if (iWriteLen != oCheckpointMsg.buffer().size()) { PLGImp("write fail, writelen %zu buffer size %zu", iWriteLen, oCheckpointMsg.buffer().size()); close(iFd); return -1; } m_llSequence++; close(iFd); PLGImp("END ok, writelen %zu", iWriteLen); return 0; }
void Learner :: OnSendCheckpoint(const CheckpointMsg & oCheckpointMsg) { PLGHead("START uuid %lu flag %d sequence %lu cpi %lu checksum %u smid %d offset %lu buffsize %zu filepath %s", oCheckpointMsg.uuid(), oCheckpointMsg.flag(), oCheckpointMsg.sequence(), oCheckpointMsg.checkpointinstanceid(), oCheckpointMsg.checksum(), oCheckpointMsg.smid(), oCheckpointMsg.offset(), oCheckpointMsg.buffer().size(), oCheckpointMsg.filepath().c_str()); int ret = 0; if (oCheckpointMsg.flag() == CheckpointSendFileFlag_BEGIN) { ret = OnSendCheckpoint_Begin(oCheckpointMsg); } else if (oCheckpointMsg.flag() == CheckpointSendFileFlag_ING) { ret = OnSendCheckpoint_Ing(oCheckpointMsg); } else if (oCheckpointMsg.flag() == CheckpointSendFileFlag_END) { ret = OnSendCheckpoint_End(oCheckpointMsg); } if (ret != 0) { PLGErr("[FAIL] reset checkpoint receiver and reset askforlearn"); m_oCheckpointReceiver.Reset(); Reset_AskforLearn_Noop(5000); SendCheckpointAck(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence(), CheckpointSendFileAckFlag_Fail); } else { SendCheckpointAck(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence(), CheckpointSendFileAckFlag_OK); Reset_AskforLearn_Noop(120000); } }
int Learner :: SendCheckpoint( const nodeid_t iSendNodeID, const uint64_t llUUID, const uint64_t llSequence, const uint64_t llCheckpointInstanceID, const uint32_t iChecksum, const std::string & sFilePath, const int iSMID, const uint64_t llOffset, const std::string & sBuffer) { CheckpointMsg oCheckpointMsg; oCheckpointMsg.set_msgtype(CheckpointMsgType_SendFile); oCheckpointMsg.set_nodeid(m_poConfig->GetMyNodeID()); oCheckpointMsg.set_flag(CheckpointSendFileFlag_ING); oCheckpointMsg.set_uuid(llUUID); oCheckpointMsg.set_sequence(llSequence); oCheckpointMsg.set_checkpointinstanceid(llCheckpointInstanceID); oCheckpointMsg.set_checksum(iChecksum); oCheckpointMsg.set_filepath(sFilePath); oCheckpointMsg.set_smid(iSMID); oCheckpointMsg.set_offset(llOffset); oCheckpointMsg.set_buffer(sBuffer); PLGImp("END, SendNodeID %lu uuid %lu sequence %lu cpi %lu checksum %u smid %d offset %lu buffsize %zu filepath %s", iSendNodeID, llUUID, llSequence, llCheckpointInstanceID, iChecksum, iSMID, llOffset, sBuffer.size(), sFilePath.c_str()); return SendMessage(iSendNodeID, oCheckpointMsg, Message_SendType_TCP); }