Exemplo n.º 1
0
int CheckpointMgr :: PrepareForAskforCheckpoint(const nodeid_t iSendNodeID)
{
    if (m_setNeedAsk.find(iSendNodeID) == m_setNeedAsk.end())
    {
        m_setNeedAsk.insert(iSendNodeID);
    }

    if (m_llLastAskforCheckpointTime == 0)
    {
        m_llLastAskforCheckpointTime = Time::GetTimestampMS();
    }

    uint64_t llNowTime = Time::GetTimestampMS();
    if (llNowTime > m_llLastAskforCheckpointTime + 60000)
    {
        PLGImp("no majority reply, just ask for checkpoint");
    }
    else
    {

        if ((int)m_setNeedAsk.size() < m_poConfig->GetMajorityCount())
        {
            PLGImp("Need more other tell us need to askforcheckpoint");
            return -2;
        }
    }

    m_llLastAskforCheckpointTime = 0;
    m_bInAskforCheckpointMode = true;

    return 0;
}
Exemplo n.º 2
0
void Proposer :: OnPrepareReply(const PaxosMsg & oPaxosMsg)
{
    PLGHead("START Msg.ProposalID %lu State.ProposalID %lu Msg.from_nodeid %lu RejectByPromiseID %lu",
            oPaxosMsg.proposalid(), m_oProposerState.GetProposalID(), 
            oPaxosMsg.nodeid(), oPaxosMsg.rejectbypromiseid());

    BP->GetProposerBP()->OnPrepareReply();
    
    if (!m_bIsPreparing)
    {
        BP->GetProposerBP()->OnPrepareReplyButNotPreparing();
        //PLGErr("Not preparing, skip this msg");
        return;
    }

    if (oPaxosMsg.proposalid() != m_oProposerState.GetProposalID())
    {
        BP->GetProposerBP()->OnPrepareReplyNotSameProposalIDMsg();
        //PLGErr("ProposalID not same, skip this msg");
        return;
    }

    m_oMsgCounter.AddReceive(oPaxosMsg.nodeid());

    if (oPaxosMsg.rejectbypromiseid() == 0)
    {
        BallotNumber oBallot(oPaxosMsg.preacceptid(), oPaxosMsg.preacceptnodeid());
        PLGDebug("[Promise] PreAcceptedID %lu PreAcceptedNodeID %lu ValueSize %zu", 
                oPaxosMsg.preacceptid(), oPaxosMsg.preacceptnodeid(), oPaxosMsg.value().size());
        m_oMsgCounter.AddPromiseOrAccept(oPaxosMsg.nodeid());
        m_oProposerState.AddPreAcceptValue(oBallot, oPaxosMsg.value());
    }
    else
    {
        PLGDebug("[Reject] RejectByPromiseID %lu", oPaxosMsg.rejectbypromiseid());
        m_oMsgCounter.AddReject(oPaxosMsg.nodeid());
        m_bWasRejectBySomeone = true;
        m_oProposerState.SetOtherProposalID(oPaxosMsg.rejectbypromiseid());
    }

    if (m_oMsgCounter.IsPassedOnThisRound())
    {
        int iUseTimeMs = m_oTimeStat.Point();
        BP->GetProposerBP()->PreparePass(iUseTimeMs);
        PLGImp("[Pass] start accept, usetime %dms", iUseTimeMs);
        m_bCanSkipPrepare = true;
        Accept();
    }
    else if (m_oMsgCounter.IsRejectedOnThisRound()
            || m_oMsgCounter.IsAllReceiveOnThisRound())
    {
        BP->GetProposerBP()->PrepareNotPass();
        PLGImp("[Not Pass] wait 30ms and restart prepare");
        AddPrepareTimer(OtherUtils::FastRand() % 30 + 10);
    }

    PLGHead("END");
}
Exemplo n.º 3
0
void Proposer :: OnAcceptReply(const PaxosMsg & oPaxosMsg)
{
    PLGHead("START Msg.ProposalID %lu State.ProposalID %lu Msg.from_nodeid %lu RejectByPromiseID %lu",
            oPaxosMsg.proposalid(), m_oProposerState.GetProposalID(), 
            oPaxosMsg.nodeid(), oPaxosMsg.rejectbypromiseid());

    BP->GetProposerBP()->OnAcceptReply();

    if (!m_bIsAccepting)
    {
        //PLGErr("Not proposing, skip this msg");
        BP->GetProposerBP()->OnAcceptReplyButNotAccepting();
        return;
    }

    if (oPaxosMsg.proposalid() != m_oProposerState.GetProposalID())
    {
        //PLGErr("ProposalID not same, skip this msg");
        BP->GetProposerBP()->OnAcceptReplyNotSameProposalIDMsg();
        return;
    }

    m_oMsgCounter.AddReceive(oPaxosMsg.nodeid());

    if (oPaxosMsg.rejectbypromiseid() == 0)
    {
        PLGDebug("[Accept]");
        m_oMsgCounter.AddPromiseOrAccept(oPaxosMsg.nodeid());
    }
    else
    {
        PLGDebug("[Reject]");
        m_oMsgCounter.AddReject(oPaxosMsg.nodeid());

        m_bWasRejectBySomeone = true;

        m_oProposerState.SetOtherProposalID(oPaxosMsg.rejectbypromiseid());
    }

    if (m_oMsgCounter.IsPassedOnThisRound())
    {
        int iUseTimeMs = m_oTimeStat.Point();
        BP->GetProposerBP()->AcceptPass(iUseTimeMs);
        PLGImp("[Pass] Start send learn, usetime %dms", iUseTimeMs);
        ExitAccept();
        m_poLearner->ProposerSendSuccess(GetInstanceID(), m_oProposerState.GetProposalID());
    }
    else if (m_oMsgCounter.IsRejectedOnThisRound()
            || m_oMsgCounter.IsAllReceiveOnThisRound())
    {
        BP->GetProposerBP()->AcceptNotPass();
        PLGImp("[Not pass] wait 30ms and Restart prepare");
        AddAcceptTimer(OtherUtils::FastRand() % 30 + 10);
    }

    PLGHead("END");
}
Exemplo n.º 4
0
int Learner :: OnSendCheckpoint_End(const CheckpointMsg & oCheckpointMsg)
{
    if (!m_oCheckpointReceiver.IsReceiverFinish(oCheckpointMsg.nodeid(), 
                oCheckpointMsg.uuid(), oCheckpointMsg.sequence()))
    {
        PLGErr("receive end msg but receiver not finish");
        return -1;
    }
    
    BP->GetCheckpointBP()->ReceiveCheckpointDone();

    std::vector<StateMachine *> vecSMList = m_poSMFac->GetSMList();
    for (auto & poSM : vecSMList)
    {
        if (poSM->SMID() == SYSTEM_V_SMID
                || poSM->SMID() == MASTER_V_SMID)
        {
            //system variables sm no checkpoint
            //master variables sm no checkpoint
            continue;
        }

        string sTmpDirPath = m_oCheckpointReceiver.GetTmpDirPath(poSM->SMID());
        std::vector<std::string> vecFilePathList;

        int ret = FileUtils :: IterDir(sTmpDirPath, vecFilePathList);
        if (ret != 0)
        {
            PLGErr("IterDir fail, dirpath %s", sTmpDirPath.c_str());
        }

        if (vecFilePathList.size() == 0)
        {
            PLGImp("this sm %d have no checkpoint", poSM->SMID());
            continue;
        }
        
        ret = poSM->LoadCheckpointState(
                m_poConfig->GetMyGroupIdx(),
                sTmpDirPath,
                vecFilePathList,
                oCheckpointMsg.checkpointinstanceid());
        if (ret != 0)
        {
            BP->GetCheckpointBP()->ReceiveCheckpointAndLoadFail();
            return ret;
        }

    }

    BP->GetCheckpointBP()->ReceiveCheckpointAndLoadSucc();
    PLGImp("All sm load state ok, start to exit process");
    exit(-1);

    return 0;
}
Exemplo n.º 5
0
void Cleaner :: run()
{
    m_bIsStart = true;
    Continue();

    uint64_t llInstanceID = m_poCheckpointMgr->GetMinChosenInstanceID();

    while (true)
    {
        if (m_bIsEnd)
        {
            PLGHead("Checkpoint.Cleaner [END]");
            return;
        }

        if (!m_bCanrun)
        {
            PLGImp("Pausing, sleep");
            m_bIsPaused = true;
            Time::MsSleep(1000);
            continue;
        }

        uint64_t llCPInstanceID = m_poSMFac->GetCheckpointInstanceID(m_poConfig->GetMyGroupIdx()) + 1;
        while (llInstanceID + m_llHoldCount < llCPInstanceID)
        {
            bool bDeleteRet = DeleteOne(llInstanceID);
            if (bDeleteRet)
            {
                PLGImp("delete one done, instanceid %lu", llInstanceID);
                llInstanceID++;
            }
            else
            {
                PLGErr("delete system fail, instanceid %lu", llInstanceID);
                break;
            }
        }

        if (llCPInstanceID == 0)
        {
            PLGImp("sleep a while, max deleted instanceid %lu checkpoint instanceid (no checkpoint) now instanceid %lu",
                   llInstanceID, m_poCheckpointMgr->GetMaxChosenInstanceID());
        }
        else
        {
            PLGImp("sleep a while, max deleted instanceid %lu checkpoint instanceid %lu now instanceid %lu",
                   llInstanceID, llCPInstanceID, m_poCheckpointMgr->GetMaxChosenInstanceID());
        }

        Time::MsSleep(1000);
    }
}
Exemplo n.º 6
0
int Learner :: SendCheckpoint(
        const nodeid_t iSendNodeID,
        const uint64_t llUUID,
        const uint64_t llSequence,
        const uint64_t llCheckpointInstanceID,
        const uint32_t iChecksum,
        const std::string & sFilePath,
        const int iSMID,
        const uint64_t llOffset,
        const std::string & sBuffer)
{
    CheckpointMsg oCheckpointMsg;

    oCheckpointMsg.set_msgtype(CheckpointMsgType_SendFile);
    oCheckpointMsg.set_nodeid(m_poConfig->GetMyNodeID());
    oCheckpointMsg.set_flag(CheckpointSendFileFlag_ING);
    oCheckpointMsg.set_uuid(llUUID);
    oCheckpointMsg.set_sequence(llSequence);
    oCheckpointMsg.set_checkpointinstanceid(llCheckpointInstanceID);
    oCheckpointMsg.set_checksum(iChecksum);
    oCheckpointMsg.set_filepath(sFilePath);
    oCheckpointMsg.set_smid(iSMID);
    oCheckpointMsg.set_offset(llOffset);
    oCheckpointMsg.set_buffer(sBuffer);

    PLGImp("END, SendNodeID %lu uuid %lu sequence %lu cpi %lu checksum %u smid %d offset %lu buffsize %zu filepath %s",
            iSendNodeID, llUUID, llSequence, llCheckpointInstanceID, 
            iChecksum, iSMID, llOffset, sBuffer.size(), sFilePath.c_str());

    return SendMessage(iSendNodeID, oCheckpointMsg, Message_SendType_TCP);
}
Exemplo n.º 7
0
int CheckpointReceiver :: InitFilePath(const std::string & sFilePath, std::string & sFormatFilePath)
{
    PLGHead("START filepath %s", sFilePath.c_str());

    string sNewFilePath = "/" + sFilePath + "/";
    vector<std::string> vecDirList;

    std::string sDirName;
    for (size_t i = 0; i < sNewFilePath.size(); i++)
    {
        if (sNewFilePath[i] == '/')
        {
            if (sDirName.size() > 0)
            {
                vecDirList.push_back(sDirName);
            }

            sDirName = "";
        }
        else
        {
            sDirName += sNewFilePath[i];
        }
    }

    sFormatFilePath = "/";
    for (size_t i = 0; i < vecDirList.size(); i++)
    {
        if (i + 1 == vecDirList.size())
        {
            sFormatFilePath += vecDirList[i];
        }
        else
        {
            sFormatFilePath += vecDirList[i] + "/";
            if (m_mapHasInitDir.find(sFormatFilePath) == end(m_mapHasInitDir))
            {
                int ret = CreateDir(sFormatFilePath);
                if (ret != 0)
                {
                    return ret;
                }

                m_mapHasInitDir[sFormatFilePath] = true;
            }
        }
    }

    PLGImp("ok, format filepath %s", sFormatFilePath.c_str());

    return 0;
}
Exemplo n.º 8
0
void Learner :: OnAskforLearn(const PaxosMsg & oPaxosMsg)
{
    BP->GetLearnerBP()->OnAskforLearn();
    
    PLGHead("START Msg.InstanceID %lu Now.InstanceID %lu Msg.from_nodeid %lu MinChosenInstanceID %lu", 
            oPaxosMsg.instanceid(), GetInstanceID(), oPaxosMsg.nodeid(),
            m_poCheckpointMgr->GetMinChosenInstanceID());
    
    SetSeenInstanceID(oPaxosMsg.instanceid(), oPaxosMsg.nodeid());

    if (oPaxosMsg.proposalnodeid() == m_poConfig->GetMyNodeID())
    {
        //Found a node follow me.
        PLImp("Found a node %lu follow me.", oPaxosMsg.nodeid());
        m_poConfig->AddFollowerNode(oPaxosMsg.nodeid());
    }
    
    if (oPaxosMsg.instanceid() >= GetInstanceID())
    {
        return;
    }

    if (oPaxosMsg.instanceid() >= m_poCheckpointMgr->GetMinChosenInstanceID())
    {
        if (!m_oLearnerSender.Prepare(oPaxosMsg.instanceid(), oPaxosMsg.nodeid()))
        {
            BP->GetLearnerBP()->OnAskforLearnGetLockFail();

            PLGErr("LearnerSender working for others.");

            if (oPaxosMsg.instanceid() == (GetInstanceID() - 1))
            {
                PLGImp("InstanceID only difference one, just send this value to other.");
                //send one value
                AcceptorStateData oState;
                int ret = m_oPaxosLog.ReadState(m_poConfig->GetMyGroupIdx(), oPaxosMsg.instanceid(), oState);
                if (ret == 0)
                {
                    BallotNumber oBallot(oState.acceptedid(), oState.acceptednodeid());
                    SendLearnValue(oPaxosMsg.nodeid(), oPaxosMsg.instanceid(), oBallot, oState.acceptedvalue(), 0);
                }
            }
            
            return;
        }
    }
    
    SendNowInstanceID(oPaxosMsg.instanceid(), oPaxosMsg.nodeid());
}
Exemplo n.º 9
0
void Learner :: OnComfirmAskForLearn(const PaxosMsg & oPaxosMsg)
{
    BP->GetLearnerBP()->OnComfirmAskForLearn();

    PLGHead("START Msg.InstanceID %lu Msg.from_nodeid %lu", oPaxosMsg.instanceid(), oPaxosMsg.nodeid());

    if (!m_oLearnerSender.Comfirm(oPaxosMsg.instanceid(), oPaxosMsg.nodeid()))
    {
        BP->GetLearnerBP()->OnComfirmAskForLearnGetLockFail();

        PLGErr("LearnerSender comfirm fail, maybe is lag msg");
        return;
    }

    PLGImp("OK, success comfirm");
}
Exemplo n.º 10
0
int Learner :: OnSendCheckpoint_Begin(const CheckpointMsg & oCheckpointMsg)
{
    int ret = m_oCheckpointReceiver.NewReceiver(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid());
    if (ret == 0)
    {
        PLGImp("NewReceiver ok");

        ret = m_poCheckpointMgr->SetMinChosenInstanceID(oCheckpointMsg.checkpointinstanceid());
        if (ret != 0)
        {
            PLGErr("SetMinChosenInstanceID fail, ret %d CheckpointInstanceID %lu",
                    ret, oCheckpointMsg.checkpointinstanceid());

            return ret;
        }
    }

    return ret;
}
Exemplo n.º 11
0
int Cleaner :: FixMinChosenInstanceID(const uint64_t llOldMinChosenInstanceID)
{
    uint64_t llCPInstanceID = m_poSMFac->GetCheckpointInstanceID(m_poConfig->GetMyGroupIdx()) + 1;
    uint64_t llFixMinChosenInstanceID = llOldMinChosenInstanceID;
    int ret = 0;

    for (uint64_t llInstanceID = llOldMinChosenInstanceID; llInstanceID < llOldMinChosenInstanceID + DELETE_SAVE_INTERVAL;
            llInstanceID++)
    {
        if (llInstanceID >= llCPInstanceID)
        {
            break;
        }

        std::string sValue;
        ret = m_poLogStorage->Get(m_poConfig->GetMyGroupIdx(), llInstanceID, sValue);
        if (ret != 0 && ret != 1)
        {
            return -1;
        }
        else if (ret == 1)
        {
            llFixMinChosenInstanceID = llInstanceID + 1;
        }
        else
        {
            break;
        }
    }

    if (llFixMinChosenInstanceID > llOldMinChosenInstanceID)
    {
        ret = m_poCheckpointMgr->SetMinChosenInstanceID(llFixMinChosenInstanceID);
        if (ret != 0)
        {
            return ret;
        }
    }

    PLGImp("ok, old minchosen %lu fix minchosen %lu", llOldMinChosenInstanceID, llFixMinChosenInstanceID);

    return 0;
}
Exemplo n.º 12
0
void Replayer :: run()
{
    PLGHead("Checkpoint.Replayer [START]");
    uint64_t llInstanceID = m_poSMFac->GetCheckpointInstanceID(m_poConfig->GetMyGroupIdx()) + 1;

    while (true)
    {
        if (m_bIsEnd)
        {
            PLGHead("Checkpoint.Replayer [END]");
            return;
        }

        if (!m_bCanrun)
        {
            //PLGImp("Pausing, sleep");
            m_bIsPaused = true;
            Time::MsSleep(1000);
            continue;
        }

        if (llInstanceID >= m_poCheckpointMgr->GetMaxChosenInstanceID())
        {
            //PLGImp("now maxchosen instanceid %lu small than excute instanceid %lu, wait",
            //m_poCheckpointMgr->GetMaxChosenInstanceID(), llInstanceID);
            Time::MsSleep(1000);
            continue;
        }

        bool bPlayRet = PlayOne(llInstanceID);
        if (bPlayRet)
        {
            PLGImp("Play one done, instanceid %lu", llInstanceID);
            llInstanceID++;
        }
        else
        {
            PLGErr("Play one fail, instanceid %lu", llInstanceID);
            Time::MsSleep(500);
        }
    }
}
Exemplo n.º 13
0
int Learner :: SendCheckpointEnd(
        const nodeid_t iSendNodeID,
        const uint64_t llUUID,
        const uint64_t llSequence,
        const uint64_t llCheckpointInstanceID)
{
    CheckpointMsg oCheckpointMsg;

    oCheckpointMsg.set_msgtype(CheckpointMsgType_SendFile);
    oCheckpointMsg.set_nodeid(m_poConfig->GetMyNodeID());
    oCheckpointMsg.set_flag(CheckpointSendFileFlag_END);
    oCheckpointMsg.set_uuid(llUUID);
    oCheckpointMsg.set_sequence(llSequence);
    oCheckpointMsg.set_checkpointinstanceid(llCheckpointInstanceID);

    PLGImp("END, SendNodeID %lu uuid %lu sequence %lu cpi %lu",
            iSendNodeID, llUUID, llSequence, llCheckpointInstanceID);

    return SendMessage(iSendNodeID, oCheckpointMsg, Message_SendType_TCP);
}
Exemplo n.º 14
0
void Learner :: SendLearnValue_Ack(const nodeid_t iSendNodeID)
{
    PLGHead("START LastAck.Instanceid %lu Now.Instanceid %lu", m_llLastAckInstanceID, GetInstanceID());

    if (GetInstanceID() < m_llLastAckInstanceID + SENDLEARNVALUE_ACK_LEAD)
    {
        PLGImp("No need to ack");
        return;
    }
    
    BP->GetLearnerBP()->SendLearnValue_Ack();

    m_llLastAckInstanceID = GetInstanceID();

    PaxosMsg oPaxosMsg;
    oPaxosMsg.set_instanceid(GetInstanceID());
    oPaxosMsg.set_msgtype(MsgType_PaxosLearner_SendLearnValue_Ack);
    oPaxosMsg.set_nodeid(m_poConfig->GetMyNodeID());

    SendMessage(iSendNodeID, oPaxosMsg);

    PLGHead("End. ok");
}
Exemplo n.º 15
0
int CheckpointReceiver :: ReceiveCheckpoint(const CheckpointMsg & oCheckpointMsg)
{
    if (oCheckpointMsg.nodeid() != m_iSenderNodeID
            || oCheckpointMsg.uuid() != m_llUUID)
    {
        PLGErr("msg not valid, Msg.SenderNodeID %lu Receiver.SenderNodeID %lu Msg.UUID %lu Receiver.UUID %lu",
                oCheckpointMsg.nodeid(), m_iSenderNodeID, oCheckpointMsg.uuid(), m_llUUID);
        return -2;
    }

    if (oCheckpointMsg.sequence() == m_llSequence)
    {
        PLGErr("msg already receive, skip, Msg.Sequence %lu Receiver.Sequence %lu",
                oCheckpointMsg.sequence(), m_llSequence);
        return 0;
    }

    if (oCheckpointMsg.sequence() != m_llSequence + 1)
    {
        PLGErr("msg sequence wrong, Msg.Sequence %lu Receiver.Sequence %lu",
                oCheckpointMsg.sequence(), m_llSequence);
        return -2;
    }

    string sFilePath = GetTmpDirPath(oCheckpointMsg.smid()) + "/" + oCheckpointMsg.filepath();
    string sFormatFilePath;
    int ret = InitFilePath(sFilePath, sFormatFilePath);
    if (ret != 0)
    {
        return -1;
    }

    int iFd = open(sFormatFilePath.c_str(), O_CREAT | O_RDWR | O_APPEND, S_IWRITE | S_IREAD);
    if (iFd == -1)
    {
        PLGErr("open file fail, filepath %s", sFormatFilePath.c_str());
        return -1;
    }

    size_t llFileOffset = lseek(iFd, 0, SEEK_END);
    if ((uint64_t)llFileOffset != oCheckpointMsg.offset())
    {
        PLGErr("file.offset %zu not equal to msg.offset %lu", llFileOffset, oCheckpointMsg.offset());
        close(iFd);
        return -2;
    }

    size_t iWriteLen = write(iFd, oCheckpointMsg.buffer().data(), oCheckpointMsg.buffer().size());
    if (iWriteLen != oCheckpointMsg.buffer().size())
    {
        PLGImp("write fail, writelen %zu buffer size %zu", iWriteLen, oCheckpointMsg.buffer().size());
        close(iFd);
        return -1;
    }

    m_llSequence++;
    close(iFd);

    PLGImp("END ok, writelen %zu", iWriteLen);

    return 0;
}