Beispiel #1
0
int Learner :: OnSendCheckpoint_End(const CheckpointMsg & oCheckpointMsg)
{
    if (!m_oCheckpointReceiver.IsReceiverFinish(oCheckpointMsg.nodeid(), 
                oCheckpointMsg.uuid(), oCheckpointMsg.sequence()))
    {
        PLGErr("receive end msg but receiver not finish");
        return -1;
    }
    
    BP->GetCheckpointBP()->ReceiveCheckpointDone();

    std::vector<StateMachine *> vecSMList = m_poSMFac->GetSMList();
    for (auto & poSM : vecSMList)
    {
        if (poSM->SMID() == SYSTEM_V_SMID
                || poSM->SMID() == MASTER_V_SMID)
        {
            //system variables sm no checkpoint
            //master variables sm no checkpoint
            continue;
        }

        string sTmpDirPath = m_oCheckpointReceiver.GetTmpDirPath(poSM->SMID());
        std::vector<std::string> vecFilePathList;

        int ret = FileUtils :: IterDir(sTmpDirPath, vecFilePathList);
        if (ret != 0)
        {
            PLGErr("IterDir fail, dirpath %s", sTmpDirPath.c_str());
        }

        if (vecFilePathList.size() == 0)
        {
            PLGImp("this sm %d have no checkpoint", poSM->SMID());
            continue;
        }
        
        ret = poSM->LoadCheckpointState(
                m_poConfig->GetMyGroupIdx(),
                sTmpDirPath,
                vecFilePathList,
                oCheckpointMsg.checkpointinstanceid());
        if (ret != 0)
        {
            BP->GetCheckpointBP()->ReceiveCheckpointAndLoadFail();
            return ret;
        }

    }

    BP->GetCheckpointBP()->ReceiveCheckpointAndLoadSucc();
    PLGImp("All sm load state ok, start to exit process");
    exit(-1);

    return 0;
}
Beispiel #2
0
void Learner :: OnSendNowInstanceID(const PaxosMsg & oPaxosMsg)
{
    BP->GetLearnerBP()->OnSendNowInstanceID();

    PLGHead("START Msg.InstanceID %lu Now.InstanceID %lu Msg.from_nodeid %lu Msg.MaxInstanceID %lu systemvariables_size %zu mastervariables_size %zu",
            oPaxosMsg.instanceid(), GetInstanceID(), oPaxosMsg.nodeid(), oPaxosMsg.nowinstanceid(), 
            oPaxosMsg.systemvariables().size(), oPaxosMsg.mastervariables().size());

    SetSeenInstanceID(oPaxosMsg.nowinstanceid(), oPaxosMsg.nodeid());

    bool bSystemVariablesChange = false;
    int ret = m_poConfig->GetSystemVSM()->UpdateByCheckpoint(oPaxosMsg.systemvariables(), bSystemVariablesChange);
    if (ret == 0 && bSystemVariablesChange)
    {
        PLGHead("SystemVariables changed!, all thing need to reflesh, so skip this msg");
        return;
    }

    bool bMasterVariablesChange = false;
    if (m_poConfig->GetMasterSM() != nullptr)
    {
        ret = m_poConfig->GetMasterSM()->UpdateByCheckpoint(oPaxosMsg.mastervariables(), bMasterVariablesChange);
        if (ret == 0 && bMasterVariablesChange)
        {
            PLGHead("MasterVariables changed!");
        }
    }
    
    if (oPaxosMsg.instanceid() != GetInstanceID())
    {
        PLGErr("Lag msg, skip");
        return;
    }

    if (oPaxosMsg.nowinstanceid() <= GetInstanceID())
    {
        PLGErr("Lag msg, skip");
        return;
    }

    if (oPaxosMsg.minchoseninstanceid() > GetInstanceID())
    {
        BP->GetCheckpointBP()->NeedAskforCheckpoint();

        PLGHead("my instanceid %lu small than other's minchoseninstanceid %lu, other nodeid %lu",
                GetInstanceID(), oPaxosMsg.minchoseninstanceid(), oPaxosMsg.nodeid());

        AskforCheckpoint(oPaxosMsg.nodeid());
    }
    else if (!m_bIsIMLearning)
    {
        ComfirmAskForLearn(oPaxosMsg.nodeid());
    }
}
int IOLoop :: AddMessage(const char * pcMessage, const int iMessageLen)
{
    m_oMessageQueue.lock();

    BP->GetIOLoopBP()->EnqueueMsg();

    if ((int)m_oMessageQueue.size() > QUEUE_MAXLENGTH)
    {
        BP->GetIOLoopBP()->EnqueueMsgRejectByFullQueue();

        PLGErr("Queue full, skip msg");
        m_oMessageQueue.unlock();
        return -2;
    }

    if (m_iQueueMemSize > MAX_QUEUE_MEM_SIZE)
    {
        PLErr("queue memsize %d too large, can't enqueue", m_iQueueMemSize);
        m_oMessageQueue.unlock();
        return -2;
    }
    
    m_oMessageQueue.add(new string(pcMessage, iMessageLen));

    m_iQueueMemSize += iMessageLen;

    m_oMessageQueue.unlock();

    return 0;
}
Beispiel #4
0
void Base :: PackBaseMsg(const std::string & sBodyBuffer, const int iCmd, std::string & sBuffer)
{
    char sGroupIdx[GROUPIDXLEN] = {0};
    int iGroupIdx = m_poConfig->GetMyGroupIdx();
    memcpy(sGroupIdx, &iGroupIdx, sizeof(sGroupIdx));

    Header oHeader;
    oHeader.set_gid(m_poConfig->GetGid());
    oHeader.set_rid(0);
    oHeader.set_cmdid(iCmd);
    oHeader.set_version(1);

    std::string sHeaderBuffer;
    bool bSucc = oHeader.SerializeToString(&sHeaderBuffer);
    if (!bSucc)
    {
        PLGErr("Header.SerializeToString fail, skip this msg");
        assert(bSucc == true);
    }

    char sHeaderLen[HEADLEN_LEN] = {0};
    uint16_t iHeaderLen = (uint16_t)sHeaderBuffer.size();
    memcpy(sHeaderLen, &iHeaderLen, sizeof(sHeaderLen));

    sBuffer = string(sGroupIdx, sizeof(sGroupIdx)) + string(sHeaderLen, sizeof(sHeaderLen)) + sHeaderBuffer + sBodyBuffer;

    //check sum
    uint32_t iBufferChecksum = crc32(0, (const uint8_t *)sBuffer.data(), sBuffer.size(), NET_CRC32SKIP);
    char sBufferChecksum[CHECKSUM_LEN] = {0};
    memcpy(sBufferChecksum, &iBufferChecksum, sizeof(sBufferChecksum));

    sBuffer += string(sBufferChecksum, sizeof(sBufferChecksum));
}
Beispiel #5
0
bool Cleaner :: DeleteOne(const uint64_t llInstanceID)
{
    WriteOptions oWriteOptions;
    oWriteOptions.bSync = false;

    int ret = m_poLogStorage->Del(oWriteOptions, m_poConfig->GetMyGroupIdx(), llInstanceID);
    if (ret != 0)
    {
        return false;
    }

    m_poCheckpointMgr->SetMinChosenInstanceIDCache(llInstanceID);

    if (llInstanceID >= m_llLastSave + DELETE_SAVE_INTERVAL)
    {
        int ret = m_poCheckpointMgr->SetMinChosenInstanceID(llInstanceID + 1);
        if (ret != 0)
        {
            PLGErr("SetMinChosenInstanceID fail, now delete instanceid %lu", llInstanceID);
            return false;
        }

        m_llLastSave = llInstanceID;
    }

    return true;
}
Beispiel #6
0
void Cleaner :: run()
{
    m_bIsStart = true;
    Continue();

    uint64_t llInstanceID = m_poCheckpointMgr->GetMinChosenInstanceID();

    while (true)
    {
        if (m_bIsEnd)
        {
            PLGHead("Checkpoint.Cleaner [END]");
            return;
        }

        if (!m_bCanrun)
        {
            PLGImp("Pausing, sleep");
            m_bIsPaused = true;
            Time::MsSleep(1000);
            continue;
        }

        uint64_t llCPInstanceID = m_poSMFac->GetCheckpointInstanceID(m_poConfig->GetMyGroupIdx()) + 1;
        while (llInstanceID + m_llHoldCount < llCPInstanceID)
        {
            bool bDeleteRet = DeleteOne(llInstanceID);
            if (bDeleteRet)
            {
                PLGImp("delete one done, instanceid %lu", llInstanceID);
                llInstanceID++;
            }
            else
            {
                PLGErr("delete system fail, instanceid %lu", llInstanceID);
                break;
            }
        }

        if (llCPInstanceID == 0)
        {
            PLGImp("sleep a while, max deleted instanceid %lu checkpoint instanceid (no checkpoint) now instanceid %lu",
                   llInstanceID, m_poCheckpointMgr->GetMaxChosenInstanceID());
        }
        else
        {
            PLGImp("sleep a while, max deleted instanceid %lu checkpoint instanceid %lu now instanceid %lu",
                   llInstanceID, llCPInstanceID, m_poCheckpointMgr->GetMaxChosenInstanceID());
        }

        Time::MsSleep(1000);
    }
}
Beispiel #7
0
int CheckpointReceiver :: CreateDir(const std::string & sDirPath)
{
    if (access(sDirPath.c_str(), F_OK) == -1)
    {
        if (mkdir(sDirPath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) == -1)
        {       
            PLGErr("Create dir fail, path %s", sDirPath.c_str());
            return -1;
        }       
    }

    return 0;
}
Beispiel #8
0
void Learner :: OnAskforCheckpoint(const PaxosMsg & oPaxosMsg)
{
    CheckpointSender * poCheckpointSender = GetNewCheckpointSender(oPaxosMsg.nodeid());
    if (poCheckpointSender != nullptr)
    {
        poCheckpointSender->start();
        PLGHead("new checkpoint sender started, send to nodeid %lu", oPaxosMsg.nodeid());
    }
    else
    {
        PLGErr("Checkpoint Sender is running");
    }
}
Beispiel #9
0
void Learner :: OnAskforLearn(const PaxosMsg & oPaxosMsg)
{
    BP->GetLearnerBP()->OnAskforLearn();
    
    PLGHead("START Msg.InstanceID %lu Now.InstanceID %lu Msg.from_nodeid %lu MinChosenInstanceID %lu", 
            oPaxosMsg.instanceid(), GetInstanceID(), oPaxosMsg.nodeid(),
            m_poCheckpointMgr->GetMinChosenInstanceID());
    
    SetSeenInstanceID(oPaxosMsg.instanceid(), oPaxosMsg.nodeid());

    if (oPaxosMsg.proposalnodeid() == m_poConfig->GetMyNodeID())
    {
        //Found a node follow me.
        PLImp("Found a node %lu follow me.", oPaxosMsg.nodeid());
        m_poConfig->AddFollowerNode(oPaxosMsg.nodeid());
    }
    
    if (oPaxosMsg.instanceid() >= GetInstanceID())
    {
        return;
    }

    if (oPaxosMsg.instanceid() >= m_poCheckpointMgr->GetMinChosenInstanceID())
    {
        if (!m_oLearnerSender.Prepare(oPaxosMsg.instanceid(), oPaxosMsg.nodeid()))
        {
            BP->GetLearnerBP()->OnAskforLearnGetLockFail();

            PLGErr("LearnerSender working for others.");

            if (oPaxosMsg.instanceid() == (GetInstanceID() - 1))
            {
                PLGImp("InstanceID only difference one, just send this value to other.");
                //send one value
                AcceptorStateData oState;
                int ret = m_oPaxosLog.ReadState(m_poConfig->GetMyGroupIdx(), oPaxosMsg.instanceid(), oState);
                if (ret == 0)
                {
                    BallotNumber oBallot(oState.acceptedid(), oState.acceptednodeid());
                    SendLearnValue(oPaxosMsg.nodeid(), oPaxosMsg.instanceid(), oBallot, oState.acceptedvalue(), 0);
                }
            }
            
            return;
        }
    }
    
    SendNowInstanceID(oPaxosMsg.instanceid(), oPaxosMsg.nodeid());
}
Beispiel #10
0
void Proposer :: OnAcceptTimeout()
{
    PLGHead("OK");
    
    if (GetInstanceID() != m_llTimeoutInstanceID)
    {
        PLGErr("TimeoutInstanceID %lu not same to NowInstanceID %lu, skip",
                m_llTimeoutInstanceID, GetInstanceID());
        return;
    }
    
    BP->GetProposerBP()->AcceptTimeout();
    
    Prepare(m_bWasRejectBySomeone);
}
Beispiel #11
0
int Base :: PackCheckpointMsg(const CheckpointMsg & oCheckpointMsg, std::string & sBuffer)
{
    std::string sBodyBuffer;
    bool bSucc = oCheckpointMsg.SerializeToString(&sBodyBuffer);
    if (!bSucc)
    {
        PLGErr("CheckpointMsg.SerializeToString fail, skip this msg");
        return -1;
    }

    int iCmd = MsgCmd_CheckpointMsg;
    PackBaseMsg(sBodyBuffer, iCmd, sBuffer);

    return 0;
}
Beispiel #12
0
void Learner :: OnComfirmAskForLearn(const PaxosMsg & oPaxosMsg)
{
    BP->GetLearnerBP()->OnComfirmAskForLearn();

    PLGHead("START Msg.InstanceID %lu Msg.from_nodeid %lu", oPaxosMsg.instanceid(), oPaxosMsg.nodeid());

    if (!m_oLearnerSender.Comfirm(oPaxosMsg.instanceid(), oPaxosMsg.nodeid()))
    {
        BP->GetLearnerBP()->OnComfirmAskForLearnGetLockFail();

        PLGErr("LearnerSender comfirm fail, maybe is lag msg");
        return;
    }

    PLGImp("OK, success comfirm");
}
Beispiel #13
0
bool Replayer :: PlayOne(const uint64_t llInstanceID)
{
    AcceptorStateData oState;
    int ret = m_oPaxosLog.ReadState(m_poConfig->GetMyGroupIdx(), llInstanceID, oState);
    if (ret != 0)
    {
        return false;
    }

    bool bExecuteRet = m_poSMFac->ExecuteForCheckpoint(
                           m_poConfig->GetMyGroupIdx(), llInstanceID, oState.acceptedvalue());
    if (!bExecuteRet)
    {
        PLGErr("Checkpoint sm excute fail, instanceid %lu", llInstanceID);
    }

    return bExecuteRet;
}
Beispiel #14
0
int Learner :: OnSendCheckpoint_Begin(const CheckpointMsg & oCheckpointMsg)
{
    int ret = m_oCheckpointReceiver.NewReceiver(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid());
    if (ret == 0)
    {
        PLGImp("NewReceiver ok");

        ret = m_poCheckpointMgr->SetMinChosenInstanceID(oCheckpointMsg.checkpointinstanceid());
        if (ret != 0)
        {
            PLGErr("SetMinChosenInstanceID fail, ret %d CheckpointInstanceID %lu",
                    ret, oCheckpointMsg.checkpointinstanceid());

            return ret;
        }
    }

    return ret;
}
Beispiel #15
0
void Replayer :: run()
{
    PLGHead("Checkpoint.Replayer [START]");
    uint64_t llInstanceID = m_poSMFac->GetCheckpointInstanceID(m_poConfig->GetMyGroupIdx()) + 1;

    while (true)
    {
        if (m_bIsEnd)
        {
            PLGHead("Checkpoint.Replayer [END]");
            return;
        }

        if (!m_bCanrun)
        {
            //PLGImp("Pausing, sleep");
            m_bIsPaused = true;
            Time::MsSleep(1000);
            continue;
        }

        if (llInstanceID >= m_poCheckpointMgr->GetMaxChosenInstanceID())
        {
            //PLGImp("now maxchosen instanceid %lu small than excute instanceid %lu, wait",
            //m_poCheckpointMgr->GetMaxChosenInstanceID(), llInstanceID);
            Time::MsSleep(1000);
            continue;
        }

        bool bPlayRet = PlayOne(llInstanceID);
        if (bPlayRet)
        {
            PLGImp("Play one done, instanceid %lu", llInstanceID);
            llInstanceID++;
        }
        else
        {
            PLGErr("Play one fail, instanceid %lu", llInstanceID);
            Time::MsSleep(500);
        }
    }
}
Beispiel #16
0
void Learner :: OnSendLearnValue(const PaxosMsg & oPaxosMsg)
{
    BP->GetLearnerBP()->OnSendLearnValue();

    PLGHead("START Msg.InstanceID %lu Now.InstanceID %lu Msg.ballot_proposalid %lu Msg.ballot_nodeid %lu Msg.ValueSize %zu",
            oPaxosMsg.instanceid(), GetInstanceID(), oPaxosMsg.proposalid(), 
            oPaxosMsg.nodeid(), oPaxosMsg.value().size());

    if (oPaxosMsg.instanceid() > GetInstanceID())
    {
        PLGDebug("[Latest Msg] i can't learn");
        return;
    }
    else if (oPaxosMsg.instanceid() < GetInstanceID())
    {
        PLGDebug("[Lag Msg] no need to learn");
        return;
    }

    //learn value
    BallotNumber oBallot(oPaxosMsg.proposalid(), oPaxosMsg.proposalnodeid());
    int ret = m_oLearnerState.LearnValue(oPaxosMsg.instanceid(), oBallot, oPaxosMsg.value(), GetLastChecksum());
    if (ret != 0)
    {
        PLGErr("LearnState.LearnValue fail, ret %d", ret);
        return;
    }
    
    PLGHead("END LearnValue OK, proposalid %lu proposalid_nodeid %lu valueLen %zu", 
            oPaxosMsg.proposalid(), oPaxosMsg.nodeid(), oPaxosMsg.value().size());

    if (oPaxosMsg.flag() == PaxosMsgFlagType_SendLearnValue_NeedAck)
    {
        //every time' when receive valid need ack learn value, reset noop timeout.
        Reset_AskforLearn_Noop();

        SendLearnValue_Ack(oPaxosMsg.nodeid());
    }
}
Beispiel #17
0
int LearnerState :: LearnValue(const uint64_t llInstanceID, const BallotNumber & oLearnedBallot, 
        const std::string & sValue, const uint32_t iLastChecksum)
{
    if (llInstanceID > 0 && iLastChecksum == 0)
    {
        m_iNewChecksum = 0;
    }
    else if (sValue.size() > 0)
    {
        m_iNewChecksum = crc32(iLastChecksum, (const uint8_t *)sValue.data(), sValue.size(), CRC32SKIP);
    }
    
    AcceptorStateData oState;
    oState.set_instanceid(llInstanceID);
    oState.set_acceptedvalue(sValue);
    oState.set_promiseid(oLearnedBallot.m_llProposalID);
    oState.set_promisenodeid(oLearnedBallot.m_llNodeID);
    oState.set_acceptedid(oLearnedBallot.m_llProposalID);
    oState.set_acceptednodeid(oLearnedBallot.m_llNodeID);
    oState.set_checksum(m_iNewChecksum);

    WriteOptions oWriteOptions;
    oWriteOptions.bSync = false;

    int ret = m_oPaxosLog.WriteState(oWriteOptions, m_poConfig->GetMyGroupIdx(), llInstanceID, oState);
    if (ret != 0)
    {
        PLGErr("LogStorage.WriteLog fail, InstanceID %lu ValueLen %zu ret %d",
                llInstanceID, sValue.size(), ret);
        return ret;
    }

    LearnValueWithoutWrite(llInstanceID, sValue, m_iNewChecksum);

    PLGDebug("OK, InstanceID %lu ValueLen %zu checksum %u",
            llInstanceID, sValue.size(), m_iNewChecksum);

    return 0;
}
Beispiel #18
0
void Learner :: OnSendCheckpoint(const CheckpointMsg & oCheckpointMsg)
{
    PLGHead("START uuid %lu flag %d sequence %lu cpi %lu checksum %u smid %d offset %lu buffsize %zu filepath %s",
            oCheckpointMsg.uuid(), oCheckpointMsg.flag(), oCheckpointMsg.sequence(), 
            oCheckpointMsg.checkpointinstanceid(), oCheckpointMsg.checksum(), oCheckpointMsg.smid(), 
            oCheckpointMsg.offset(), oCheckpointMsg.buffer().size(), oCheckpointMsg.filepath().c_str());

    int ret = 0;
    
    if (oCheckpointMsg.flag() == CheckpointSendFileFlag_BEGIN)
    {
        ret = OnSendCheckpoint_Begin(oCheckpointMsg);
    }
    else if (oCheckpointMsg.flag() == CheckpointSendFileFlag_ING)
    {
        ret = OnSendCheckpoint_Ing(oCheckpointMsg);
    }
    else if (oCheckpointMsg.flag() == CheckpointSendFileFlag_END)
    {
        ret = OnSendCheckpoint_End(oCheckpointMsg);
    }

    if (ret != 0)
    {
        PLGErr("[FAIL] reset checkpoint receiver and reset askforlearn");

        m_oCheckpointReceiver.Reset();

        Reset_AskforLearn_Noop(5000);
        SendCheckpointAck(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence(), CheckpointSendFileAckFlag_Fail);
    }
    else
    {
        SendCheckpointAck(oCheckpointMsg.nodeid(), oCheckpointMsg.uuid(), oCheckpointMsg.sequence(), CheckpointSendFileAckFlag_OK);
        Reset_AskforLearn_Noop(120000);
    }
}
Beispiel #19
0
int CheckpointReceiver :: NewReceiver(const nodeid_t iSenderNodeID, const uint64_t llUUID)
{
    int ret = ClearCheckpointTmp();
    if (ret != 0)
    {
        return ret;
    }

    ret = m_poLogStorage->ClearAllLog(m_poConfig->GetMyGroupIdx());
    if (ret != 0)
    {
        PLGErr("ClearAllLog fail, groupidx %d ret %d", 
                m_poConfig->GetMyGroupIdx(), ret);
        return ret;
    }
    
    m_mapHasInitDir.clear();

    m_iSenderNodeID = iSenderNodeID;
    m_llUUID = llUUID;
    m_llSequence = 0;

    return 0;
}
Beispiel #20
0
int CheckpointReceiver :: ReceiveCheckpoint(const CheckpointMsg & oCheckpointMsg)
{
    if (oCheckpointMsg.nodeid() != m_iSenderNodeID
            || oCheckpointMsg.uuid() != m_llUUID)
    {
        PLGErr("msg not valid, Msg.SenderNodeID %lu Receiver.SenderNodeID %lu Msg.UUID %lu Receiver.UUID %lu",
                oCheckpointMsg.nodeid(), m_iSenderNodeID, oCheckpointMsg.uuid(), m_llUUID);
        return -2;
    }

    if (oCheckpointMsg.sequence() == m_llSequence)
    {
        PLGErr("msg already receive, skip, Msg.Sequence %lu Receiver.Sequence %lu",
                oCheckpointMsg.sequence(), m_llSequence);
        return 0;
    }

    if (oCheckpointMsg.sequence() != m_llSequence + 1)
    {
        PLGErr("msg sequence wrong, Msg.Sequence %lu Receiver.Sequence %lu",
                oCheckpointMsg.sequence(), m_llSequence);
        return -2;
    }

    string sFilePath = GetTmpDirPath(oCheckpointMsg.smid()) + "/" + oCheckpointMsg.filepath();
    string sFormatFilePath;
    int ret = InitFilePath(sFilePath, sFormatFilePath);
    if (ret != 0)
    {
        return -1;
    }

    int iFd = open(sFormatFilePath.c_str(), O_CREAT | O_RDWR | O_APPEND, S_IWRITE | S_IREAD);
    if (iFd == -1)
    {
        PLGErr("open file fail, filepath %s", sFormatFilePath.c_str());
        return -1;
    }

    size_t llFileOffset = lseek(iFd, 0, SEEK_END);
    if ((uint64_t)llFileOffset != oCheckpointMsg.offset())
    {
        PLGErr("file.offset %zu not equal to msg.offset %lu", llFileOffset, oCheckpointMsg.offset());
        close(iFd);
        return -2;
    }

    size_t iWriteLen = write(iFd, oCheckpointMsg.buffer().data(), oCheckpointMsg.buffer().size());
    if (iWriteLen != oCheckpointMsg.buffer().size())
    {
        PLGImp("write fail, writelen %zu buffer size %zu", iWriteLen, oCheckpointMsg.buffer().size());
        close(iFd);
        return -1;
    }

    m_llSequence++;
    close(iFd);

    PLGImp("END ok, writelen %zu", iWriteLen);

    return 0;
}