CGraphProgressHandler() : threaded("CGraphProgressHandler") { self = queryMyNode()->endpoint(); stopped = true; StringBuffer ipStr; queryClusterGroup().queryNode(0).endpoint().getIpText(ipStr); sock.setown(ISocket::udp_connect(getFixedPort(getMasterPortBase(), TPORT_watchdog),ipStr.str())); progressEnabled = globals->getPropBool("@watchdogProgressEnabled"); sendData(); // send initial data stopped = false; #ifdef _WIN32 threaded.adjustPriority(+1); // it is critical that watchdog packets get through. #endif threaded.init(this); }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).str(),masterEp.getUrlStr(masterStr).str()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError(TE_FailedToRegisterSlave, "Thor master/slave version mismatch"); return false; } Owned<IGroup> rawGroup = deserializeIGroup(msg); globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1); unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1); unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC); unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT); setClusterGroup(masterNode, rawGroup, slavesPerNode, channelsPerSlave, slaveBasePort, localThorPortInc); unsigned numStrands, blockSize; if (globals->hasProp("Debug/@forceNumStrands")) numStrands = globals->getPropInt("Debug/@forceNumStrands"); else { numStrands = defaultForceNumStrands; globals->setPropInt("Debug/@forceNumStrands", defaultForceNumStrands); } if (globals->hasProp("Debug/@strandBlockSize")) blockSize = globals->getPropInt("Debug/@strandBlockSize"); else { blockSize = defaultStrandBlockSize; globals->setPropInt("Debug/@strandBlockSize", defaultStrandBlockSize); } PROGLOG("Strand defaults: numStrands=%u, blockSize=%u", numStrands, blockSize); const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG); ERRLOG("%s", errStr.str()); #ifndef _DEBUG replyError(TE_FailedToRegisterSlave, errStr.str()); return false; #endif } msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryNodeComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryNodeComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; ::masterNode = LINK(masterNode); PROGLOG("verifying mp connection to rest of cluster"); if (!queryNodeComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.str()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).toCharArray(),masterEp.getUrlStr(masterStr).toCharArray()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError("Thor master/slave version mismatch"); return false; } Owned<IGroup> group = deserializeIGroup(msg); setClusterGroup(group); SocketEndpoint myEp = queryMyNode()->endpoint(); rank_t groupPos = group->rank(queryMyNode()); if (RANK_NULL == groupPos) { replyError("Node not part of thorgroup"); return false; } if (globals->hasProp("@SLAVENUM") && (mySlaveNum != (unsigned)groupPos)) { VStringBuffer errStr("Slave group rank[%d] does not match provided cmd line slaveNum[%d]", mySlaveNum, (unsigned)groupPos); replyError(errStr.str()); return false; } globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); #ifndef _DEBUG if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); replyError(errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG).str()); return false; } #endif msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryClusterComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryClusterComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; PROGLOG("verifying mp connection to rest of cluster"); if (!queryClusterComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); ::masterNode = LINK(masterNode); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.toCharArray()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }