void MasterDamon :: run() { m_bIsStarted = true; while(true) { if (m_bIsEnd) { return; } int iLeaseTime = m_iLeaseTime; uint64_t llBeginTime = Time::GetTimestampMS(); TryBeMaster(iLeaseTime); int iConitnueLeaseTimeout = (iLeaseTime - 100) / 3; if (m_bNeedDropMaster) { m_bNeedDropMaster = false; iConitnueLeaseTimeout = iLeaseTime * 2; PLG1Imp("Need drop master, this round wait time %dms", iConitnueLeaseTimeout); } uint64_t llEndTime = Time::GetTimestampMS(); int iRunTime = llEndTime > llBeginTime ? llEndTime - llBeginTime : 0; int iNeedSleepTime = iConitnueLeaseTimeout > iRunTime ? iConitnueLeaseTimeout - iRunTime : 0; PLG1Imp("TryBeMaster, sleep time %dms", iNeedSleepTime); Time::MsSleep(iNeedSleepTime); } }
int MasterStateMachine :: Init() { MasterVariables oVariables; int ret = m_oMVStore.Read(m_iMyGroupIdx, oVariables); if (ret != 0 && ret != 1) { PLG1Err("Master variables read from store fail, ret %d", ret); return -1; } if (ret == 1) { PLG1Imp("no master variables exist"); } else { m_llMasterVersion = oVariables.version(); if (oVariables.masternodeid() == m_iMyNodeID) { m_iMasterNodeID = nullnode; m_llAbsExpireTime = 0; } else { m_iMasterNodeID = oVariables.masternodeid(); m_llAbsExpireTime = Time::GetTimestampMS() + oVariables.leasetime(); } } PLG1Head("OK, master nodeid %lu version %lu expiretime %u", m_iMasterNodeID, m_llMasterVersion, m_llAbsExpireTime); return 0; }
int SystemVSM :: UpdateByCheckpoint(const std::string & sCPBuffer, bool & bChange) { if (sCPBuffer.size() == 0) { return 0; } bChange = false; SystemVariables oVariables; bool bSucc = oVariables.ParseFromArray(sCPBuffer.data(), sCPBuffer.size()); if (!bSucc) { PLG1Err("Variables.ParseFromArray fail, bufferlen %zu", sCPBuffer.size()); return -1; } if (oVariables.version() == (uint64_t)-1) { PLG1Err("variables.version not init, this is not checkpoint"); return -2; } if (m_oSystemVariables.gid() != 0 && oVariables.gid() != m_oSystemVariables.gid()) { PLG1Err("gid not same, cp.gid %lu now.gid %lu", oVariables.gid(), m_oSystemVariables.gid()); return -2; } if (m_oSystemVariables.version() != (uint64_t)-1 && oVariables.version() <= m_oSystemVariables.version()) { PLG1Imp("lag checkpoint, no need update, cp.version %lu now.version %lu", oVariables.version(), m_oSystemVariables.version()); return 0; } bChange = true; SystemVariables oOldVariables = m_oSystemVariables; int ret = UpdateSystemVariables(oVariables); if (ret != 0) { return -1; } PLG1Head("ok, cp.version %lu cp.membercount %d old.version %lu old.membercount %d", oVariables.version(), oVariables.membership_size(), oOldVariables.version(), oOldVariables.membership_size()); return 0; }
int MasterStateMachine :: UpdateByCheckpoint(const std::string & sCPBuffer, bool & bChange) { if (sCPBuffer.size() == 0) { return 0; } MasterVariables oVariables; bool bSucc = oVariables.ParseFromArray(sCPBuffer.data(), sCPBuffer.size()); if (!bSucc) { PLG1Err("Variables.ParseFromArray fail, bufferlen %zu", sCPBuffer.size()); return -1; } std::lock_guard<std::mutex> oLockGuard(m_oMutex); if (oVariables.version() <= m_llMasterVersion && m_llMasterVersion != (uint64_t)-1) { PLG1Imp("lag checkpoint, no need update, cp.version %lu now.version %lu", oVariables.version(), m_llMasterVersion); return 0; } int ret = UpdateMasterToStore(oVariables.masternodeid(), oVariables.version(), oVariables.leasetime()); if (ret != 0) { return -1; } PLG1Head("ok, cp.version %lu cp.masternodeid %lu old.version %lu old.masternodeid %lu", oVariables.version(), oVariables.masternodeid(), m_llMasterVersion, m_iMasterNodeID); m_llMasterVersion = oVariables.version(); if (oVariables.masternodeid() == m_iMyNodeID) { m_iMasterNodeID = nullnode; m_llAbsExpireTime = 0; } else { m_iMasterNodeID = oVariables.masternodeid(); m_llAbsExpireTime = Time::GetSteadyClockMS() + oVariables.leasetime(); } return 0; }
int SystemVSM :: Init() { int ret = m_oSystemVStore.Read(m_iMyGroupIdx, m_oSystemVariables); if (ret != 0 && ret != 1) { return ret; } if (ret == 1) { m_oSystemVariables.set_gid(0); m_oSystemVariables.set_version(-1); PLG1Imp("variables not exist"); } else { RefleshNodeID(); PLG1Imp("OK, gourpidx %d gid %lu version %lu", m_iMyGroupIdx, m_oSystemVariables.gid(), m_oSystemVariables.version()); } return 0; }
int Database :: GetMinChosenInstanceID(uint64_t & llMinInstanceID) { if (!m_bHasInit) { PLG1Err("no init yet"); return -1; } static uint64_t llMinKey = MINCHOSEN_KEY; std::string sValue; int ret = GetFromLevelDB(llMinKey, sValue); if (ret != 0 && ret != 1) { PLG1Err("fail, ret %d", ret); return ret; } if (ret == 1) { PLG1Err("no min chosen instanceid"); llMinInstanceID = 0; return 0; } //old version, minchonsenid store in logstore. //new version, minchonsenid directly store in leveldb. if (m_poValueStore->IsValidFileID(sValue)) { ret = Get(llMinKey, sValue); if (ret != 0 && ret != 1) { PLG1Err("Get from log store fail, ret %d", ret); return ret; } } if (sValue.size() != sizeof(uint64_t)) { PLG1Err("fail, mininstanceid size wrong"); return -2; } memcpy(&llMinInstanceID, sValue.data(), sizeof(uint64_t)); PLG1Imp("ok, min chosen instanceid %lu", llMinInstanceID); return 0; }
int MasterStateMachine :: LearnMaster( const uint64_t llInstanceID, const MasterOperator & oMasterOper, const uint64_t llAbsMasterTimeout) { ScopedLock<Mutex> oLockGuard(m_oMutex); if (oMasterOper.version() != m_llMasterVersion) { PLG1Err("version conflit, op version %lu now master version %lu", oMasterOper.version(), m_llMasterVersion); return 0; } int ret = UpdateMasterToStore(oMasterOper.nodeid(), llInstanceID, oMasterOper.timeout()); if (ret != 0) { PLG1Err("UpdateMasterToStore fail, ret %d", ret); return -1; } m_iMasterNodeID = oMasterOper.nodeid(); if (m_iMasterNodeID == m_iMyNodeID) { //self be master //use local abstimeout m_llAbsExpireTime = llAbsMasterTimeout; PLG1Head("Be master success, absexpiretime %lu", m_llAbsExpireTime); } else { //other be master //use new start timeout m_llAbsExpireTime = Time::GetTimestampMS() + oMasterOper.timeout(); PLG1Head("Ohter be master, absexpiretime %lu", m_llAbsExpireTime); } m_iLeaseTime = oMasterOper.timeout(); m_llMasterVersion = llInstanceID; PLG1Imp("OK, masternodeid %lu version %lu abstimeout %lu", m_iMasterNodeID, m_llMasterVersion, m_llAbsExpireTime); return 0; }
int Database :: Init(const std::string & sDBPath, const int iMyGroupIdx) { if (m_bHasInit) { return 0; } m_iMyGroupIdx = iMyGroupIdx; m_sDBPath = sDBPath; leveldb::Options oOptions; oOptions.create_if_missing = true; oOptions.comparator = &m_oPaxosCmp; //every group have different buffer size to avoid all group compact at the same time. oOptions.write_buffer_size = 1024 * 1024 + iMyGroupIdx * 10 * 1024; leveldb::Status oStatus = leveldb::DB::Open(oOptions, sDBPath, &m_poLevelDB); if (!oStatus.ok()) { PLG1Err("Open leveldb fail, db_path %s", sDBPath.c_str()); return -1; } m_poValueStore = new LogStore(); assert(m_poValueStore != nullptr); int ret = m_poValueStore->Init(sDBPath, iMyGroupIdx, (Database *)this); if (ret != 0) { PLG1Err("value store init fail, ret %d", ret); return -1; } m_bHasInit = true; PLG1Imp("OK, db_path %s", sDBPath.c_str()); return 0; }
bool MasterStateMachine :: Execute(const int iGroupIdx, const uint64_t llInstanceID, const std::string & sValue, SMCtx * poSMCtx) { MasterOperator oMasterOper; bool bSucc = oMasterOper.ParseFromArray(sValue.data(), sValue.size()); if (!bSucc) { PLG1Err("oMasterOper data wrong"); //wrong oper data, just skip, so return true return true; } if (oMasterOper.operator_() == MasterOperatorType_Complete) { uint64_t * pAbsMasterTimeout = nullptr; if (poSMCtx != nullptr && poSMCtx->m_pCtx != nullptr) { pAbsMasterTimeout = (uint64_t *)poSMCtx->m_pCtx; } uint64_t llAbsMasterTimeout = pAbsMasterTimeout != nullptr ? *pAbsMasterTimeout : 0; PLG1Imp("absmaster timeout %lu", llAbsMasterTimeout); int ret = LearnMaster(llInstanceID, oMasterOper, llAbsMasterTimeout); if (ret != 0) { return false; } } else { PLG1Err("unknown op %u", oMasterOper.operator_()); //wrong op, just skip, so return true; return true; } return true; }
int Database :: SetMinChosenInstanceID(const WriteOptions & oWriteOptions, const uint64_t llMinInstanceID) { if (!m_bHasInit) { PLG1Err("no init yet"); return -1; } static uint64_t llMinKey = MINCHOSEN_KEY; char sValue[sizeof(uint64_t)] = {0}; memcpy(sValue, &llMinInstanceID, sizeof(uint64_t)); int ret = PutToLevelDB(true, llMinKey, string(sValue, sizeof(uint64_t))); if (ret != 0) { return ret; } PLG1Imp("ok, min chosen instanceid %lu", llMinInstanceID); return 0; }
void MasterDamon :: TryBeMaster(const int iLeaseTime) { nodeid_t iMasterNodeID = nullnode; uint64_t llMasterVersion = 0; //step 1 check exist master and get version m_oDefaultMasterSM.SafeGetMaster(iMasterNodeID, llMasterVersion); if (iMasterNodeID != nullnode && (iMasterNodeID != m_poPaxosNode->GetMyNodeID())) { PLG1Imp("Ohter as master, can't try be master, masterid %lu myid %lu", iMasterNodeID, m_poPaxosNode->GetMyNodeID()); return; } //step 2 try be master std::string sPaxosValue; if (!MasterStateMachine::MakeOpValue( m_poPaxosNode->GetMyNodeID(), llMasterVersion, iLeaseTime, MasterOperatorType_Complete, sPaxosValue)) { PLG1Err("Make paxos value fail"); return; } const int iMasterLeaseTimeout = iLeaseTime - 100; uint64_t llAbsMasterTimeout = Time::GetTimestampMS() + iMasterLeaseTimeout; uint64_t llCommitInstanceID = 0; SMCtx oCtx; oCtx.m_iSMID = MASTER_V_SMID; oCtx.m_pCtx = (void *)&llAbsMasterTimeout; m_poPaxosNode->Propose(m_iMyGroupIdx, sPaxosValue, llCommitInstanceID, &oCtx); }
int MasterStateMachine :: LearnMaster( const uint64_t llInstanceID, const MasterOperator & oMasterOper, const uint64_t llAbsMasterTimeout) { std::lock_guard<std::mutex> oLockGuard(m_oMutex); PLG1Debug("my last version %lu other last version %lu this version %lu instanceid %lu", m_llMasterVersion, oMasterOper.lastversion(), oMasterOper.version(), llInstanceID); if (oMasterOper.lastversion() != 0 && llInstanceID > m_llMasterVersion && oMasterOper.lastversion() != m_llMasterVersion) { BP->GetMasterBP()->MasterSMInconsistent(); PLG1Err("other last version %lu not same to my last version %lu, instanceid %lu", oMasterOper.lastversion(), m_llMasterVersion, llInstanceID); PLG1Err("try to fix, set my master version %lu as other last version %lu, instanceid %lu", m_llMasterVersion, oMasterOper.lastversion(), llInstanceID); m_llMasterVersion = oMasterOper.lastversion(); } if (oMasterOper.version() != m_llMasterVersion) { PLG1Debug("version conflit, op version %lu now master version %lu", oMasterOper.version(), m_llMasterVersion); return 0; } int ret = UpdateMasterToStore(oMasterOper.nodeid(), llInstanceID, oMasterOper.timeout()); if (ret != 0) { PLG1Err("UpdateMasterToStore fail, ret %d", ret); return -1; } bool bMasterChange = false; if (m_iMasterNodeID != oMasterOper.nodeid()) { bMasterChange = true; } m_iMasterNodeID = oMasterOper.nodeid(); if (m_iMasterNodeID == m_iMyNodeID) { //self be master //use local abstimeout m_llAbsExpireTime = llAbsMasterTimeout; BP->GetMasterBP()->SuccessBeMaster(); PLG1Head("Be master success, absexpiretime %lu", m_llAbsExpireTime); } else { //other be master //use new start timeout m_llAbsExpireTime = Time::GetSteadyClockMS() + oMasterOper.timeout(); BP->GetMasterBP()->OtherBeMaster(); PLG1Head("Ohter be master, absexpiretime %lu", m_llAbsExpireTime); } m_iLeaseTime = oMasterOper.timeout(); m_llMasterVersion = llInstanceID; if (bMasterChange) { if (m_pMasterChangeCallback != nullptr) { m_pMasterChangeCallback(m_iMyGroupIdx, NodeInfo(m_iMasterNodeID), m_llMasterVersion); } } PLG1Imp("OK, masternodeid %lu version %lu abstimeout %lu", m_iMasterNodeID, m_llMasterVersion, m_llAbsExpireTime); return 0; }