示例#1
0
    CGraphProgressHandler() : threaded("CGraphProgressHandler")
    {
        self = queryMyNode()->endpoint();
        stopped = true;

        StringBuffer ipStr;
        queryClusterGroup().queryNode(0).endpoint().getIpText(ipStr);
        sock.setown(ISocket::udp_connect(getFixedPort(getMasterPortBase(), TPORT_watchdog),ipStr.str()));
        progressEnabled = globals->getPropBool("@watchdogProgressEnabled");
        sendData();                         // send initial data
        stopped = false;
#ifdef _WIN32
        threaded.adjustPriority(+1); // it is critical that watchdog packets get through.
#endif
        threaded.init(this);
    }
示例#2
0
static bool RegisterSelf(SocketEndpoint &masterEp)
{
    StringBuffer slfStr;
    StringBuffer masterStr;
    LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).str(),masterEp.getUrlStr(masterStr).str());
    try
    {
        SocketEndpoint ep = masterEp;
        ep.port = getFixedPort(getMasterPortBase(), TPORT_mp);
        Owned<INode> masterNode = createINode(ep);
        CMessageBuffer msg;
        if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION))
            return false;
        PROGLOG("Initialization received");
        unsigned vmajor, vminor;
        msg.read(vmajor);
        msg.read(vminor);
        if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR)
        {
            replyError(TE_FailedToRegisterSlave, "Thor master/slave version mismatch");
            return false;
        }
        Owned<IGroup> rawGroup = deserializeIGroup(msg);
        globals->Release();
        globals = createPTree(msg);
        mergeCmdParams(globals); // cmd line

        unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1);
        unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1);
        unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC);
        unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT);
        setClusterGroup(masterNode, rawGroup, slavesPerNode, channelsPerSlave, slaveBasePort, localThorPortInc);

        unsigned numStrands, blockSize;
        if (globals->hasProp("Debug/@forceNumStrands"))
            numStrands = globals->getPropInt("Debug/@forceNumStrands");
        else
        {
            numStrands = defaultForceNumStrands;
            globals->setPropInt("Debug/@forceNumStrands", defaultForceNumStrands);
        }
        if (globals->hasProp("Debug/@strandBlockSize"))
            blockSize = globals->getPropInt("Debug/@strandBlockSize");
        else
        {
            blockSize = defaultStrandBlockSize;
            globals->setPropInt("Debug/@strandBlockSize", defaultStrandBlockSize);
        }
        PROGLOG("Strand defaults: numStrands=%u, blockSize=%u", numStrands, blockSize);

        const char *_masterBuildTag = globals->queryProp("@masterBuildTag");
        const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag";
        PROGLOG("Master build: %s", masterBuildTag);
        if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag))
        {
            StringBuffer errStr("Thor master/slave build mismatch, master = ");
            errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG);
            ERRLOG("%s", errStr.str());
#ifndef _DEBUG
            replyError(TE_FailedToRegisterSlave, errStr.str());
            return false;
#endif
        }
        msg.read((unsigned &)masterSlaveMpTag);
        msg.clear();
        msg.setReplyTag(MPTAG_THORREGISTRATION);
        if (!queryNodeComm().reply(msg))
            return false;

        PROGLOG("Registration confirmation sent");
        if (!queryNodeComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered
            return false;

        ::masterNode = LINK(masterNode);

        PROGLOG("verifying mp connection to rest of cluster");
        if (!queryNodeComm().verifyAll())
            ERRLOG("Failed to connect to all nodes");
        else
            PROGLOG("verified mp connection to rest of cluster");
        LOG(MCdebugProgress, thorJob, "registered %s",slfStr.str());
    }
    catch (IException *e)
    {
        FLLOG(MCexception(e), thorJob, e,"slave registration error");
        e->Release();
        return false;
    }
    return true;
}
示例#3
0
static bool RegisterSelf(SocketEndpoint &masterEp)
{
    StringBuffer slfStr;
    StringBuffer masterStr;
    LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).toCharArray(),masterEp.getUrlStr(masterStr).toCharArray());
    try
    {
        SocketEndpoint ep = masterEp;
        ep.port = getFixedPort(getMasterPortBase(), TPORT_mp);
        Owned<INode> masterNode = createINode(ep);
        CMessageBuffer msg;
        if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION))
            return false;
        PROGLOG("Initialization received");
        unsigned vmajor, vminor;
        msg.read(vmajor);
        msg.read(vminor);
        if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR)
        {
            replyError("Thor master/slave version mismatch");
            return false;
        }
        Owned<IGroup> group = deserializeIGroup(msg);
        setClusterGroup(group);

        SocketEndpoint myEp = queryMyNode()->endpoint();
        rank_t groupPos = group->rank(queryMyNode());
        if (RANK_NULL == groupPos)
        {
            replyError("Node not part of thorgroup");
            return false;
        }
        if (globals->hasProp("@SLAVENUM") && (mySlaveNum != (unsigned)groupPos))
        {
            VStringBuffer errStr("Slave group rank[%d] does not match provided cmd line slaveNum[%d]", mySlaveNum, (unsigned)groupPos);
            replyError(errStr.str());
            return false;
        }
        globals->Release();
        globals = createPTree(msg);
        mergeCmdParams(globals); // cmd line 

        const char *_masterBuildTag = globals->queryProp("@masterBuildTag");
        const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag";
        PROGLOG("Master build: %s", masterBuildTag);
#ifndef _DEBUG
        if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag))
        {
            StringBuffer errStr("Thor master/slave build mismatch, master = ");
            replyError(errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG).str());
            return false;
        }
#endif
        msg.read((unsigned &)masterSlaveMpTag);
        msg.clear();
        msg.setReplyTag(MPTAG_THORREGISTRATION);
        if (!queryClusterComm().reply(msg))
            return false;

        PROGLOG("Registration confirmation sent");
        if (!queryClusterComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered
            return false;
        PROGLOG("verifying mp connection to rest of cluster");
        if (!queryClusterComm().verifyAll())
            ERRLOG("Failed to connect to all nodes");
        else
            PROGLOG("verified mp connection to rest of cluster");
        ::masterNode = LINK(masterNode);
        LOG(MCdebugProgress, thorJob, "registered %s",slfStr.toCharArray());
    }
    catch (IException *e)
    {
        FLLOG(MCexception(e), thorJob, e,"slave registration error");
        e->Release();
        return false;
    }
    return true;
}