int LogMsgLogReceiverThread::run() { while(!done) { try { if(queryWorldCommunicator().recv(in, childNode, MPTAG_JLOG_CHILD_TO_PARENT)) { msgBuffer.deserialize(in, true); if(isListener) listener->report(msgBuffer); else queryLogMsgManager()->report(msgBuffer); } } catch(IException * e) { done = true; CMessageBuffer out; out.append('D').append(cid); try { queryWorldCommunicator().send(out, queryMyNode(), MPTAG_JLOG_CONNECT_TO_PARENT, MP_ASYNC_SEND); } catch(IException * ee) { ee->Release(); } e->Release(); } } return 0; }
void notify(MemoryBuffer &returndata) // if returns false should unsubscribe { if (hasaborted) { throw MakeStringException(-1,"Subscription notification aborted"); return; } size32_t dlen = returndata.length(); CMessageBuffer mb; mb.append(tag).append(sid).append(dlen).append(returndata); try { if (!queryWorldCommunicator().send(mb,dst,MPTAG_DALI_SUBSCRIPTION_FULFILL,1000*60*3)) { // Must reply in 3 Minutes // Kludge to avoid locking SDS on blocked client hasaborted = true; StringBuffer tmp; throw MakeStringException(-1,"Subscription notification to %s timed out",dst->endpoint().getUrlStr(tmp).str()); return; } } catch (IMP_Exception *e) { PrintExceptionLog(e,"Dali CSubscriptionStub"); hasaborted = true; throw; } }
bool WUSreply() { msgbuf.swapWith(wusbuf); bool ret = queryWorldCommunicator().reply(msgbuf,1000*5*60); msgbuf.clear(); return ret; }
virtual void main() { running = true; loop { INode *senderNode; CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, NULL, MPTAG_THORREGISTRATION, &senderNode)) return; rank_t sender = queryClusterGroup().rank(senderNode); SocketEndpoint ep = senderNode->endpoint(); ep.port -= THOR_MP_INC; StringBuffer url; ep.getUrlStr(url); if (RANK_NULL == sender) { PROGLOG("Node %s trying to deregister is not part of this cluster", url.str()); continue; } RegistryCode code; msg.read((int &)code); if (!rc_deregister == code) throwUnexpected(); registry.deregisterNode(sender); } running = false; }
virtual void main() { running = true; loop { INode *senderNode; CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, NULL, MPTAG_THORREGISTRATION, &senderNode)) return; rank_t sender = queryNodeGroup().rank(senderNode); SocketEndpoint ep = senderNode->endpoint(); StringBuffer url; ep.getUrlStr(url); if (RANK_NULL == sender) { PROGLOG("Node %s trying to deregister is not part of this cluster", url.str()); continue; } RegistryCode code; msg.read((int &)code); if (rc_deregister != code) throwUnexpected(); Owned<IException> e = deserializeException(msg); if (e.get()) EXCLOG(e, "Slave unregistered with exception"); registry.deregisterNode(sender-1); } running = false; }
bool UnregisterSelf(IException *e) { if (!hasMPServerStarted()) return false; StringBuffer slfStr; slfEp.getUrlStr(slfStr); LOG(MCdebugProgress, thorJob, "Unregistering slave : %s", slfStr.str()); try { CMessageBuffer msg; msg.append((int)rc_deregister); serializeException(e, msg); // NB: allows exception to be NULL if (!queryWorldCommunicator().send(msg, masterNode, MPTAG_THORREGISTRATION, 60*1000)) { LOG(MCerror, thorJob, "Failed to unregister slave : %s", slfStr.str()); return false; } LOG(MCdebugProgress, thorJob, "Unregistered slave : %s", slfStr.str()); return true; } catch (IException *e) { if (!jobListenerStopped) FLLOG(MCexception(e), thorJob, e,"slave unregistration error"); e->Release(); } return false; }
void stop() { if (running) { running = false; queryWorldCommunicator().cancel(NULL, MPTAG_THORREGISTRATION); threaded.join(); } }
virtual void send(CNodeInfo & info) { CMessageBuffer mb; info.serialize(mb); startMPServer(port); PROGLOG("Sending tlk"); queryWorldCommunicator().send(mb, dest, MPTAG_KEYDIFF); stopMPServer(); }
virtual bool recv(CNodeInfo & info) { if(queryWorldCommunicator().recv(mb, 0, MPTAG_KEYDIFF)) { info.deserialize(mb); return true; } return false; }
void LogMsgLogReceiverThread::stop() { if(!done) { done = true; queryWorldCommunicator().cancel(childNode, MPTAG_JLOG_CHILD_TO_PARENT); join(); } }
int LogMsgChildReceiverThread::run() { INode * sender; char ctrl; while(!done) { try { if(queryWorldCommunicator().recv(in, 0, MPTAG_JLOG_CONNECT_TO_PARENT, &sender)) { in.read(ctrl); if(ctrl=='A') { MPLogId pid; in.read(pid); MPLogId cid = addChildToManager(pid, sender, false, true); StringBuffer buff; in.clear().append(cid); queryWorldCommunicator().reply(in, MP_ASYNC_SEND); } else if(ctrl=='D') { MPLogId cid; in.read(cid); removeChildFromManager(cid, true); } else ERRLOG("LogMsgChildReceiverThread::run() : unknown control character on received message"); if(sender) sender->Release(); } } catch(IException * e) { EXCLOG(e, "LogMsgChildReceiverThread::run()"); e->Release(); } catch(...) { ERRLOG("LogMsgChildReceiverThread::run() : unknown exception"); } } return 0; }
bool accept(unsigned timeout) { msgbuf.clear(); if (queryWorldCommunicator().recv(msgbuf,NULL,MPTAG_SASHA_REQUEST,NULL,timeout?timeout:(5*60*1000))&&msgbuf.length()) { deserialize(msgbuf); msgbuf.clear(); return true; } return false; }
static void sendReceive(INode * serverNode, CMessageBuffer & msg) { if (!queryWorldCommunicator().sendRecv(msg, serverNode, MPTAG_FILEVIEW, TIMEOUT)) throwError(FVERR_TimeoutRemoteFileView); msg.setEndian(__BIG_ENDIAN); IException * error = deserializeException(msg); if (error) throw error; }
static void replyError(unsigned errorCode, const char *errorMsg) { SocketEndpoint myEp = queryMyNode()->endpoint(); StringBuffer str("Node '"); myEp.getUrlStr(str); str.append("' exception: ").append(errorMsg); Owned<IException> e = MakeStringException(errorCode, "%s", str.str()); CMessageBuffer msg; serializeException(e, msg); queryWorldCommunicator().send(msg, 0, MPTAG_THORREGISTRATION); }
void CLogMsgLinkToChild::sendFilter(ILogMsgFilter * filter) const { CMessageBuffer out; filter->serialize(out, false); try { queryWorldCommunicator().send(out, childNode, MPTAG_JLOG_PARENT_TO_CHILD, MP_ASYNC_SEND); } catch(IException * e) { e->Release(); } }
void CLogMsgLinkToChild::disconnect() { CMessageBuffer out; out.append('D').append(pid); try { queryWorldCommunicator().send(out, childNode, MPTAG_JLOG_CONNECT_TO_CHILD, MP_ASYNC_SEND); } catch(IException * e) { e->Release(); } connected = false; }
void CLogMsgLinkToChild::connect() { CMessageBuffer out; out.append('A').append(cid); try { queryWorldCommunicator().sendRecv(out, childNode, MPTAG_JLOG_CONNECT_TO_CHILD); } catch(IException * e) { e->Release(); } out.read(pid); connected = true; }
bool reply() { msgbuf.clear(); unsigned n = ids.ordinality(); msgbuf.append(n); unsigned i; for (i=0;i<n;i++) msgbuf.append(ids.item(i).text); msgbuf.append(resultoverflow); n = results.ordinality(); msgbuf.append(n); for (i=0;i<n;i++) msgbuf.append(results.item(i).text); msgbuf.append(numdts); for (i=0;i<numdts;i++) dts[i].serialize(msgbuf); bool ret = queryWorldCommunicator().reply(msgbuf,1000*5*60); msgbuf.clear(); return ret; }
void UnregisterSelf() { StringBuffer slfStr; slfEp.getUrlStr(slfStr); LOG(MCdebugProgress, thorJob, "Unregistering slave : %s", slfStr.toCharArray()); try { CMessageBuffer msg; msg.append((int)rc_deregister); if (!queryWorldCommunicator().send(msg, masterNode, MPTAG_THORREGISTRATION, 60*1000)) { LOG(MCerror, thorJob, "Failed to unregister slave : %s", slfStr.toCharArray()); return; } LOG(MCdebugProgress, thorJob, "Unregistered slave : %s", slfStr.toCharArray()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave unregistration error"); e->Release(); } }
void cancelaccept() { queryWorldCommunicator().cancel(NULL,MPTAG_SASHA_REQUEST); }
virtual void stop() { queryWorldCommunicator().cancel(0, MPTAG_KEYDIFF); }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).str(),masterEp.getUrlStr(masterStr).str()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError(TE_FailedToRegisterSlave, "Thor master/slave version mismatch"); return false; } Owned<IGroup> rawGroup = deserializeIGroup(msg); globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1); unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1); unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC); unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT); setClusterGroup(masterNode, rawGroup, slavesPerNode, channelsPerSlave, slaveBasePort, localThorPortInc); unsigned numStrands, blockSize; if (globals->hasProp("Debug/@forceNumStrands")) numStrands = globals->getPropInt("Debug/@forceNumStrands"); else { numStrands = defaultForceNumStrands; globals->setPropInt("Debug/@forceNumStrands", defaultForceNumStrands); } if (globals->hasProp("Debug/@strandBlockSize")) blockSize = globals->getPropInt("Debug/@strandBlockSize"); else { blockSize = defaultStrandBlockSize; globals->setPropInt("Debug/@strandBlockSize", defaultStrandBlockSize); } PROGLOG("Strand defaults: numStrands=%u, blockSize=%u", numStrands, blockSize); const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG); ERRLOG("%s", errStr.str()); #ifndef _DEBUG replyError(TE_FailedToRegisterSlave, errStr.str()); return false; #endif } msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryNodeComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryNodeComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; ::masterNode = LINK(masterNode); PROGLOG("verifying mp connection to rest of cluster"); if (!queryNodeComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.str()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }
bool send(INode *node,unsigned timeout) { unsigned retries = 3; loop { try { CMessageBuffer mb; serialize(mb); if (queryWorldCommunicator().sendRecv(mb,node,MPTAG_SASHA_REQUEST,timeout?timeout:12*60*60*1000)) { // could take a long time! clearIds(); clearResults(); if (action==SCA_WORKUNIT_SERVICES_GET) { mb.swapWith(wusbuf); } else { unsigned n=0; unsigned i; if (mb.length()-mb.getPos()>=sizeof(unsigned)) { mb.read(n); for (i=0;i<n;i++) { StringAttr s; mb.read(s); addId(s.get()); } if (mb.length()-mb.getPos()>=sizeof(unsigned)+sizeof(bool)) { mb.read(resultoverflow); mb.read(n); for (i=0;i<n;i++) { StringAttr res; mb.read(res); size32_t reslen = res.length(); results.append(*new StringAttrItem(res,reslen)); resultsize += reslen; } if (mb.length()-mb.getPos()>=sizeof(unsigned)) { mb.read(numdts); free(dts); dts = NULL; if (numdts) { dts = (CDateTime *)calloc(numdts,sizeof(CDateTime)); for (i=0;i<numdts;i++) dts[i].deserialize(mb); } } } } } return true; } else break; } catch (IException *e) { if ((--retries==0)||(action==SCA_STOP)) throw; EXCLOG(e,"CSashaCommand send"); ::Release(e); } try { // shouldn't really be necessary but make sure socket really closed queryWorldCommunicator().disconnect(node); } catch (IException *e) { EXCLOG(e,"CSashaCommand disconnect"); ::Release(e); } }; return false; }
void processMessage(CMessageBuffer &mb) { ICoven &coven=queryCoven(); MemoryBuffer params; params.swapWith(mb); int fn; params.read(fn); switch (fn) { case MDR_GET_VALUE: { StringAttr id; StringBuffer buf; params.read(id); if (0 == stricmp(id,"threads")) { mb.append(getThreadList(buf).str()); } else if (0 == stricmp(id, "mpqueue")) { mb.append(getReceiveQueueDetails(buf).str()); } else if (0 == stricmp(id, "locks")) { mb.append(querySDS().getLocks(buf).str()); } else if (0 == stricmp(id, "sdsstats")) { mb.append(querySDS().getUsageStats(buf).str()); } else if (0 == stricmp(id, "connections")) { mb.append(querySDS().getConnections(buf).str()); } else if (0 == stricmp(id, "sdssubscribers")) { mb.append(querySDS().getSubscribers(buf).str()); } else if (0 == stricmp(id, "clients")) { mb.append(querySessionManager().getClientProcessList(buf).str()); } else if (0 == stricmp(id, "subscriptions")) { mb.append(getSubscriptionList(buf).str()); } else if (0 == stricmp(id, "mpverify")) { queryWorldCommunicator().verifyAll(buf); mb.append(buf.str()); } else if (0 == stricmp(id, "extconsistency")) { mb.append(querySDS().getExternalReport(buf).str()); } else if (0 == stricmp(id, "build")) { mb.append("$Id: dadiags.cpp 62376 2011-02-04 21:59:58Z sort $"); } else if (0 == stricmp(id, "sdsfetch")) { StringAttr branchpath; params.read(branchpath); Linked<IPropertyTree> sroot = querySDSServer().lockStoreRead(); try { sroot->queryPropTree(branchpath)->serialize(mb); } catch (...) { querySDSServer().unlockStoreRead(); throw; } querySDSServer().unlockStoreRead(); } else if (0 == stricmp(id, "perf")) { getSystemTraceInfo(buf,PerfMonStandard); mb.append(buf.str()); } else if (0 == stricmp(id, "sdssize")) { StringAttr branchpath; params.read(branchpath); Linked<IPropertyTree> sroot = querySDSServer().lockStoreRead(); StringBuffer sbuf; try { toXML(sroot->queryPropTree(branchpath),sbuf); DBGLOG("sdssize '%s' = %d",branchpath.get(),sbuf.length()); } catch (...) { querySDSServer().unlockStoreRead(); throw; } querySDSServer().unlockStoreRead(); mb.append(sbuf.length()); } else if (0 == stricmp(id, "disconnect")) { StringAttr client; params.read(client); SocketEndpoint ep(client); PROGLOG("Dalidiag request to close client connection: %s", client.get()); Owned<INode> node = createINode(ep); queryCoven().disconnect(node); } else if (0 == stricmp(id, "unlock")) { __int64 connectionId; bool disconnect; params.read(connectionId); params.read(disconnect); PROGLOG("Dalidiag request to unlock connection id: %" I64F "x", connectionId); StringBuffer connectionInfo; bool success = querySDSServer().unlock(connectionId, disconnect, connectionInfo); mb.append(success); if (success) mb.append(connectionInfo); } else if (0 == stricmp(id, "save")) { PROGLOG("Dalidiag requests SDS save"); querySDSServer().saveRequest(); } else if (0 == stricmp(id, "settracetransactions")) { PROGLOG("Dalidiag requests Trace Transactions"); if(traceAllTransactions(true)) mb.append("OK - no change"); else mb.append("OK - transaction tracing enabled"); } else if (0 == stricmp(id, "cleartracetransactions")) { PROGLOG("Dalidiag requests Trace Transactions stopped"); if(traceAllTransactions(false)) mb.append("OK - transaction tracing disabled"); else mb.append("OK - no change"); } else if (0 == stricmp(id, "setldapflags")) { unsigned f; params.read(f); PROGLOG("Dalidiag requests setldapflags %d",f); querySessionManager().setLDAPflags(f); } else if (0 == stricmp(id, "getldapflags")) { unsigned f=querySessionManager().getLDAPflags();; mb.append(f); } else if (0 == stricmp(id, "setsdsdebug")) { PROGLOG("Dalidiag setsdsdebug"); unsigned p; params.read(p); StringArray arr; while (p--) { StringAttr s; params.read(s); arr.append(s); } StringBuffer reply; bool success = querySDSServer().setSDSDebug(arr, reply); mb.append(success).append(reply); } else mb.append(StringBuffer("UNKNOWN OPTION: ").append(id).str()); } break; } coven.reply(mb); }
void LogMsgChildReceiverThread::stop() { done = true; queryWorldCommunicator().cancel(0, MPTAG_JLOG_CONNECT_TO_PARENT); join(); }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).toCharArray(),masterEp.getUrlStr(masterStr).toCharArray()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError("Thor master/slave version mismatch"); return false; } Owned<IGroup> group = deserializeIGroup(msg); setClusterGroup(group); SocketEndpoint myEp = queryMyNode()->endpoint(); rank_t groupPos = group->rank(queryMyNode()); if (RANK_NULL == groupPos) { replyError("Node not part of thorgroup"); return false; } if (globals->hasProp("@SLAVENUM") && (mySlaveNum != (unsigned)groupPos)) { VStringBuffer errStr("Slave group rank[%d] does not match provided cmd line slaveNum[%d]", mySlaveNum, (unsigned)groupPos); replyError(errStr.str()); return false; } globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); #ifndef _DEBUG if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); replyError(errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG).str()); return false; } #endif msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryClusterComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryClusterComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; PROGLOG("verifying mp connection to rest of cluster"); if (!queryClusterComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); ::masterNode = LINK(masterNode); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.toCharArray()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }