void start() { ActivityTimer s(totalCycles, timeActivities, NULL); dataLinkStart(); eogPending = false; if (container.queryLocal() || firstNode()) { CMessageBuffer reqMsg; reqMsg.setReplyTag(replyTag); reqMsg.append(smt_actMsg); reqMsg.append(container.queryOwner().queryGraphId()); reqMsg.append(container.queryId()); if (!container.queryJob().queryJobComm().sendRecv(reqMsg, 0, container.queryJob().querySlaveMpTag(), LONGTIMEOUT)) throwUnexpected(); masterReplyMsg.swapWith(reqMsg); } }
void doBroadcast() { try { unsigned i = 0; unsigned n; if (1 == nodeindex) n = broadcastSlave-1; else if (broadcastSlave==nodeindex) n = 0; else n = nodeindex-1; loop { unsigned t = target(i++,n); if (t>numnodes) break; if (t != broadcastSlave) { #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Sending to node %d size %d",nodeindex,t,broadcasting.length()); #endif mptag_t rt = createReplyTag(); broadcasting.setReplyTag(rt); // simulate sendRecv comm->send(broadcasting, t, mpTag); CMessageBuffer rMsg; comm->recv(rMsg, t, rt); #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Sent to node %d size %d received back %d",nodeindex,t,broadcasting.length(),rMsg.length()); #endif } } } catch (IException *e) { ActPrintLog(activity, e, "CBroadcaster::broadcast exception"); throw; } #ifdef _TRACEBROADCAST ActPrintLog(activity, "do broadcast done done"); #endif }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).str(),masterEp.getUrlStr(masterStr).str()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError(TE_FailedToRegisterSlave, "Thor master/slave version mismatch"); return false; } Owned<IGroup> rawGroup = deserializeIGroup(msg); globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1); unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1); unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC); unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT); setClusterGroup(masterNode, rawGroup, slavesPerNode, channelsPerSlave, slaveBasePort, localThorPortInc); unsigned numStrands, blockSize; if (globals->hasProp("Debug/@forceNumStrands")) numStrands = globals->getPropInt("Debug/@forceNumStrands"); else { numStrands = defaultForceNumStrands; globals->setPropInt("Debug/@forceNumStrands", defaultForceNumStrands); } if (globals->hasProp("Debug/@strandBlockSize")) blockSize = globals->getPropInt("Debug/@strandBlockSize"); else { blockSize = defaultStrandBlockSize; globals->setPropInt("Debug/@strandBlockSize", defaultStrandBlockSize); } PROGLOG("Strand defaults: numStrands=%u, blockSize=%u", numStrands, blockSize); const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG); ERRLOG("%s", errStr.str()); #ifndef _DEBUG replyError(TE_FailedToRegisterSlave, errStr.str()); return false; #endif } msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryNodeComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryNodeComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; ::masterNode = LINK(masterNode); PROGLOG("verifying mp connection to rest of cluster"); if (!queryNodeComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.str()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).toCharArray(),masterEp.getUrlStr(masterStr).toCharArray()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError("Thor master/slave version mismatch"); return false; } Owned<IGroup> group = deserializeIGroup(msg); setClusterGroup(group); SocketEndpoint myEp = queryMyNode()->endpoint(); rank_t groupPos = group->rank(queryMyNode()); if (RANK_NULL == groupPos) { replyError("Node not part of thorgroup"); return false; } if (globals->hasProp("@SLAVENUM") && (mySlaveNum != (unsigned)groupPos)) { VStringBuffer errStr("Slave group rank[%d] does not match provided cmd line slaveNum[%d]", mySlaveNum, (unsigned)groupPos); replyError(errStr.str()); return false; } globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); #ifndef _DEBUG if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); replyError(errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG).str()); return false; } #endif msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryClusterComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryClusterComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; PROGLOG("verifying mp connection to rest of cluster"); if (!queryClusterComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); ::masterNode = LINK(masterNode); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.toCharArray()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }