int PNode :: InitLogStorage(const Options & oOptions, LogStorage *& poLogStorage) { if (oOptions.poLogStorage != nullptr) { poLogStorage = oOptions.poLogStorage; PLImp("OK, use user logstorage"); return 0; } if (oOptions.sLogStoragePath.size() == 0) { PLErr("LogStorage Path is null"); return -2; } int ret = m_oDefaultLogStorage.Init(oOptions.sLogStoragePath, oOptions.iGroupCount); if (ret != 0) { PLErr("Init default logstorage fail, logpath %s ret %d", oOptions.sLogStoragePath.c_str(), ret); return ret; } poLogStorage = &m_oDefaultLogStorage; PLImp("OK, use default logstorage"); return 0; }
void EventLoop :: RemoveEvent(const Event * poEvent) { auto it = m_mapEvent.find(poEvent->GetSocketFd()); if (it == end(m_mapEvent)) { return; } int iEpollOpertion = EPOLL_CTL_DEL; epoll_event tEpollEvent; tEpollEvent.events = 0; tEpollEvent.data.fd = poEvent->GetSocketFd(); int ret = epoll_ctl(m_iEpollFd, iEpollOpertion, poEvent->GetSocketFd(), &tEpollEvent); if (ret == -1) { PLErr("epoll_ctl fail, EpollFd %d EpollOpertion %d SocketFd %d", m_iEpollFd, iEpollOpertion, poEvent->GetSocketFd()); //to do //when error return; } m_mapEvent.erase(poEvent->GetSocketFd()); }
int IOLoop :: AddMessage(const char * pcMessage, const int iMessageLen) { m_oMessageQueue.lock(); BP->GetIOLoopBP()->EnqueueMsg(); if ((int)m_oMessageQueue.size() > QUEUE_MAXLENGTH) { BP->GetIOLoopBP()->EnqueueMsgRejectByFullQueue(); PLGErr("Queue full, skip msg"); m_oMessageQueue.unlock(); return -2; } if (m_iQueueMemSize > MAX_QUEUE_MEM_SIZE) { PLErr("queue memsize %d too large, can't enqueue", m_iQueueMemSize); m_oMessageQueue.unlock(); return -2; } m_oMessageQueue.add(new string(pcMessage, iMessageLen)); m_iQueueMemSize += iMessageLen; m_oMessageQueue.unlock(); return 0; }
void EventLoop :: ModEvent(const Event * poEvent, const int iEvents) { auto it = m_mapEvent.find(poEvent->GetSocketFd()); int iEpollOpertion = 0; if (it == end(m_mapEvent)) { iEpollOpertion = EPOLL_CTL_ADD; } else { iEpollOpertion = it->second.m_iEvents ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; } epoll_event tEpollEvent; tEpollEvent.events = iEvents; tEpollEvent.data.fd = poEvent->GetSocketFd(); int ret = epoll_ctl(m_iEpollFd, iEpollOpertion, poEvent->GetSocketFd(), &tEpollEvent); if (ret == -1) { PLErr("epoll_ctl fail, EpollFd %d EpollOpertion %d SocketFd %d EpollEvent %d", m_iEpollFd, iEpollOpertion, poEvent->GetSocketFd(), iEvents); //to do return; } EventCtx tCtx; tCtx.m_poEvent = (Event *)poEvent; tCtx.m_iEvents = iEvents; m_mapEvent[poEvent->GetSocketFd()] = tCtx; }
void EventLoop :: OneLoop(const int iTimeoutMs) { int n = epoll_wait(m_iEpollFd, m_EpollEvents, MAX_EVENTS, 1); if (n == -1) { if (errno != EINTR) { PLErr("epoll_wait fail, errno %d", errno); return; } } for (int i = 0; i < n; i++) { int iFd = m_EpollEvents[i].data.fd; auto it = m_mapEvent.find(iFd); if (it == end(m_mapEvent)) { continue; } int iEvents = m_EpollEvents[i].events; Event * poEvent = it->second.m_poEvent; int ret = 0; if (iEvents & EPOLLERR) { OnError(iEvents, poEvent); continue; } try { if (iEvents & EPOLLIN) { ret = poEvent->OnRead(); } if (iEvents & EPOLLOUT) { ret = poEvent->OnWrite(); } } catch (...) { ret = -1; } if (ret != 0) { OnError(iEvents, poEvent); } } }
int PNode :: OnReceiveMessage(const char * pcMessage, const int iMessageLen) { if (pcMessage == nullptr || iMessageLen <= 0) { PLErr("Message size %d to small, not valid.", iMessageLen); return -2; } int iGroupIdx = -1; memcpy(&iGroupIdx, pcMessage, GROUPIDXLEN); if (!CheckGroupID(iGroupIdx)) { PLErr("Message groupid %d wrong, groupsize %zu", iGroupIdx, m_vecGroupList.size()); return Paxos_GroupIdxWrong; } return m_vecGroupList[iGroupIdx]->GetInstance()->OnReceiveMessage(pcMessage, iMessageLen); }
int TcpClient :: AddMessage(const std::string & sIP, const int iPort, const std::string & sMessage) { //PLImp("ok"); MessageEvent * poEvent = GetEvent(sIP, iPort); if (poEvent == nullptr) { PLErr("no event created for this ip %s port %d", sIP.c_str(), iPort); return -1; } return poEvent->AddMessage(sMessage); }
int Notify :: Init() { int ret = pipe(m_iPipeFD); if (ret != 0) { PLErr("create pipe fail, ret %d", ret); return ret; } AddEvent(EPOLLIN); return 0; }
int MultiDatabase :: Init(const std::string & sDBPath, const int iGroupCount) { if (access(sDBPath.c_str(), F_OK) == -1) { PLErr("DBPath not exist or no limit to open, %s", sDBPath.c_str()); return -1; } if (iGroupCount < 1 || iGroupCount > 100000) { PLErr("Groupcount wrong %d", iGroupCount); return -2; } std::string sNewDBPath = sDBPath; if (sDBPath[sDBPath.size() - 1] != '/') { sNewDBPath += '/'; } for (int iGroupIdx = 0; iGroupIdx < iGroupCount; iGroupIdx++) { char sGroupDBPath[512] = {0}; snprintf(sGroupDBPath, sizeof(sGroupDBPath), "%sg%d", sNewDBPath.c_str(), iGroupIdx); Database * poDB = new Database(); assert(poDB != nullptr); m_vecDBList.push_back(poDB); if (poDB->Init(sGroupDBPath, iGroupIdx) != 0) { return -1; } } PLImp("OK, DBPath %s groupcount %d", sDBPath.c_str(), iGroupCount); return 0; }
int PNode :: CheckOptions(const Options & oOptions) { //init logger if (oOptions.pLogFunc != nullptr) { LOGGER->SetLogFunc(oOptions.pLogFunc); } else { LOGGER->InitLogger(oOptions.eLogLevel); } if (oOptions.poLogStorage == nullptr && oOptions.sLogStoragePath.size() == 0) { PLErr("no logpath and logstorage is null"); return -2; } if (oOptions.iUDPMaxSize > 64 * 1024) { PLErr("udp max size %zu is too large", oOptions.iUDPMaxSize); return -2; } if (oOptions.iGroupCount > 200) { PLErr("group count %d is too large", oOptions.iGroupCount); return -2; } if (oOptions.iGroupCount <= 0) { PLErr("group count %d is small than zero or equal to zero", oOptions.iGroupCount); return -2; } for (auto & oFollowerNodeInfo : oOptions.vecFollowerNodeInfoList) { if (oFollowerNodeInfo.oMyNode.GetNodeID() == oFollowerNodeInfo.oFollowNode.GetNodeID()) { PLErr("self node ip %s port %d equal to follow node", oFollowerNodeInfo.oMyNode.GetIP().c_str(), oFollowerNodeInfo.oMyNode.GetPort()); return -2; } } for (auto & oGroupSMInfo : oOptions.vecGroupSMInfoList) { if (oGroupSMInfo.iGroupIdx >= oOptions.iGroupCount) { PLErr("SM GroupIdx %d large than GroupCount %d", oGroupSMInfo.iGroupIdx, oOptions.iGroupCount); return -2; } } return 0; }
bool SMFac :: ExecuteForCheckpoint(const int iGroupIdx, const uint64_t llInstanceID, const std::string & sPaxosValue) { if (sPaxosValue.size() < sizeof(int)) { PLErr("Value wrong, instanceid %lu size %zu", llInstanceID, sPaxosValue.size()); //need do nothing, just skip return true; } int iSMID = 0; memcpy(&iSMID, sPaxosValue.data(), sizeof(int)); if (iSMID == 0) { PLImp("Value no need to do sm, just skip, instanceid %lu", llInstanceID); return true; } if (m_vecSMList.size() == 0) { PLImp("No any sm, need wait sm, instanceid %lu", llInstanceID); return false; } for (auto & poSM : m_vecSMList) { if (poSM->SMID() == iSMID) { std::string sBodyValue = string(sPaxosValue.data() + sizeof(int), sPaxosValue.size() - sizeof(int)); return poSM->ExecuteForCheckpoint(iGroupIdx, llInstanceID, sBodyValue); } } PLErr("Unknown smid %d instanceid %lu", iSMID, llInstanceID); return false; }
void EventLoop :: OnError(const int iEvents, Event * poEvent) { BP->GetNetworkBP()->TcpOnError(); PLErr("event error, events %d socketfd %d socket ip %s errno %d", iEvents, poEvent->GetSocketFd(), poEvent->GetSocketHost().c_str(), errno); RemoveEvent(poEvent); bool bNeedDelete = false; poEvent->OnError(bNeedDelete); if (bNeedDelete) { poEvent->Destroy(); } }
int PhxKV :: RunPaxos() { bool bSucc = m_oPhxKVSM.Init(); if (!bSucc) { return -1; } Options oOptions; oOptions.sLogStoragePath = m_sPaxosLogPath; //this groupcount means run paxos group count. //every paxos group is independent, there are no any communicate between any 2 paxos group. oOptions.iGroupCount = m_iGroupCount; oOptions.oMyNode = m_oMyNode; oOptions.vecNodeInfoList = m_vecNodeList; //because all group share state machine(kv), so every group have same sate machine. //just for split key to different paxos group, to upgrate performance. for (int iGroupIdx = 0; iGroupIdx < m_iGroupCount; iGroupIdx++) { GroupSMInfo oSMInfo; oSMInfo.iGroupIdx = iGroupIdx; oSMInfo.vecSMList.push_back(&m_oPhxKVSM); oSMInfo.bIsUseMaster = true; oOptions.vecGroupSMInfoList.push_back(oSMInfo); } //set logfunc oOptions.pLogFunc = LOGGER->GetLogFunc(); int ret = Node::RunNode(oOptions, m_poPaxosNode); if (ret != 0) { PLErr("run paxos fail, ret %d", ret); return ret; } PLImp("run paxos ok\n"); return 0; }
int PhxKV :: KVPropose(const std::string & sKey, const std::string & sPaxosValue, PhxKVSMCtx & oPhxKVSMCtx) { int iGroupIdx = GetGroupIdx(sKey); SMCtx oCtx; //smid must same to PhxKVSM.SMID(). oCtx.m_iSMID = 1; oCtx.m_pCtx = (void *)&oPhxKVSMCtx; uint64_t llInstanceID = 0; int ret = m_poPaxosNode->Propose(iGroupIdx, sPaxosValue, llInstanceID, &oCtx); if (ret != 0) { PLErr("paxos propose fail, key %s groupidx %d ret %d", iGroupIdx, ret); return ret; } return 0; }
int EventLoop :: Init(const int iEpollLength) { m_iEpollFd = epoll_create(iEpollLength); if (m_iEpollFd == -1) { PLErr("epoll_create fail, ret %d", m_iEpollFd); return -1; } m_poNotify = new Notify(this); assert(m_poNotify != nullptr); int ret = m_poNotify->Init(); if (ret != 0) { return ret; } return 0; }
const std::map<nodeid_t, uint64_t> & Config :: GetMyFollowerMap() { uint64_t llNowTime = Time::GetSteadyClockMS(); for (auto it = m_mapMyFollower.begin(); it != end(m_mapMyFollower);) { if (it->second < llNowTime) { PLErr("follower %lu timeout, nowtimems %lu tmpnode last add time %lu", it->first, llNowTime, it->second); it = m_mapMyFollower.erase(it); } else { it++; } } return m_mapMyFollower; }
const std::map<nodeid_t, uint64_t> & Config :: GetTmpNodeMap() { uint64_t llNowTime = Time::GetSteadyClockMS(); for (auto it = m_mapTmpNodeOnlyForLearn.begin(); it != end(m_mapTmpNodeOnlyForLearn);) { if (it->second < llNowTime) { PLErr("tmpnode %lu timeout, nowtimems %lu tmpnode last add time %lu", it->first, llNowTime, it->second); it = m_mapTmpNodeOnlyForLearn.erase(it); } else { it++; } } return m_mapTmpNodeOnlyForLearn; }
int PNode :: InitNetWork(const Options & oOptions, NetWork *& poNetWork) { if (oOptions.poNetWork != nullptr) { poNetWork = oOptions.poNetWork; PLImp("OK, use user network"); return 0; } int ret = m_oDefaultNetWork.Init(oOptions.oMyNode.GetIP(), oOptions.oMyNode.GetPort()); if (ret != 0) { PLErr("init default network fail, listenip %s listenport %d ret %d", oOptions.oMyNode.GetIP().c_str(), oOptions.oMyNode.GetPort(), ret); return ret; } poNetWork = &m_oDefaultNetWork; PLImp("OK, use default network"); return 0; }
int DFNetWork :: Init(const std::string & sListenIp, const int iListenPort) { int ret = m_oUDPSend.Init(); if (ret != 0) { return ret; } ret = m_oUDPRecv.Init(iListenPort); if (ret != 0) { return ret; } ret = m_oTcpIOThread.Init(sListenIp, iListenPort); if (ret != 0) { PLErr("m_oTcpIOThread Init fail, ret %d", ret); return ret; } return 0; }
int StateMachine :: LockCheckpointState() { PLErr("func not impl, return -1"); return -1; }
int PNode :: Init(const Options & oOptions, NetWork *& poNetWork) { int ret = CheckOptions(oOptions); if (ret != 0) { PLErr("CheckOptions fail, ret %d", ret); return ret; } m_iMyNodeID = oOptions.oMyNode.GetNodeID(); //step1 init logstorage LogStorage * poLogStorage = nullptr; ret = InitLogStorage(oOptions, poLogStorage); if (ret != 0) { return ret; } //step2 init network ret = InitNetWork(oOptions, poNetWork); if (ret != 0) { return ret; } //step3 build masterlist for (int iGroupIdx = 0; iGroupIdx < oOptions.iGroupCount; iGroupIdx++) { MasterMgr * poMaster = new MasterMgr(this, iGroupIdx, poLogStorage, oOptions.pMasterChangeCallback); assert(poMaster != nullptr); m_vecMasterList.push_back(poMaster); ret = poMaster->Init(); if (ret != 0) { return ret; } } //step4 build grouplist for (int iGroupIdx = 0; iGroupIdx < oOptions.iGroupCount; iGroupIdx++) { Group * poGroup = new Group(poLogStorage, poNetWork, m_vecMasterList[iGroupIdx]->GetMasterSM(), iGroupIdx, oOptions); assert(poGroup != nullptr); m_vecGroupList.push_back(poGroup); } //step5 build batchpropose if (oOptions.bUseBatchPropose) { for (int iGroupIdx = 0; iGroupIdx < oOptions.iGroupCount; iGroupIdx++) { ProposeBatch * poProposeBatch = new ProposeBatch(iGroupIdx, this, &m_oNotifierPool); assert(poProposeBatch != nullptr); m_vecProposeBatch.push_back(poProposeBatch); } } //step6 init statemachine InitStateMachine(oOptions); //step7 parallel init group for (auto & poGroup : m_vecGroupList) { poGroup->StartInit(); } for (auto & poGroup : m_vecGroupList) { int initret = poGroup->GetInitRet(); if (initret != 0) { ret = initret; } } if (ret != 0) { return ret; } //last step. must init ok, then should start threads. //because that stop threads is slower, if init fail, we need much time to stop many threads. //so we put start threads in the last step. for (auto & poGroup : m_vecGroupList) { //start group's thread first. poGroup->Start(); } RunMaster(oOptions); RunProposeBatch(); PLHead("OK"); return 0; }
//default no checkpoint int StateMachine :: GetCheckpointState(const int iGroupIdx, std::string & sDirPath, std::vector<std::string> & vecFileList) { PLErr("func not impl, return -1"); return -1; }
int StateMachine :: LoadCheckpointState(const int iGroupIdx, const std::string & sCheckpointTmpFileDirPath, const std::vector<std::string> & vecFileList, const uint64_t llCheckpointInstanceID) { PLErr("func not impl, return -1"); return -1; }