void process() { CMessageBuffer msg; unsigned inputs = container.getInputs(); unsigned slaves = container.queryJob().querySlaves(); unsigned s; bool readSome=false, slaveReadSome; IntArray replyTags; for (s=0; s<slaves; s++) replyTags.append(0); while (inputs>1) { inputs--; for (s=0; s<slaves; s++) { rank_t sender; if (!receiveMsg(msg, RANK_ALL, replyTag, &sender)) return; replyTags.replace(msg.getReplyTag(), ((int)sender)-1); msg.read(slaveReadSome); if (slaveReadSome) readSome = true; } msg.clear().append(readSome); for (s=0; s<slaves; s++) { if (!queryJobChannel().queryJobComm().send(msg, ((rank_t) s+1), (mptag_t) replyTags.item(s), LONGTIMEOUT)) throw MakeActivityException(this, 0, "Failed to give result to slave"); } if (readSome) // got some, have told slaves to ignore rest, so finish break; } }
void process() { if (!container.queryLocal() && container.queryJob().querySlaves() > 1) { CMessageBuffer msg; unsigned nslaves = container.queryJob().querySlaves(); unsigned s = 1; rowcount_t totalCount = 0, slaveCount; for (; s<=nslaves; s++) { if (!receiveMsg(msg, s, mpTag)) return; msg.read(slaveCount); if (RCUNSET == slaveCount) { totalCount = RCUNSET; break; // unknown } totalCount += slaveCount; } s=1; msg.clear().append(totalCount); for (; s<=nslaves; s++) container.queryJob().queryJobComm().send(msg, s, mpTag); } }
virtual void process() { CChooseSetsActivityMaster::process(); IHThorChooseSetsArg *helper = (IHThorChooseSetsArg *)queryHelper(); unsigned numSets = helper->getNumSets(); unsigned nslaves = container.queryJob().querySlaves(); MemoryBuffer countMb; rowcount_t *counts = (rowcount_t *)countMb.reserveTruncate((numSets*(nslaves+2)) * sizeof(rowcount_t)); rowcount_t *totals = counts + nslaves*numSets; rowcount_t *tallies = totals + numSets; memset(counts, 0, countMb.length()); unsigned s=nslaves; CMessageBuffer msg; while (s--) { msg.clear(); rank_t sender; if (!receiveMsg(msg, RANK_ALL, mpTag, &sender)) return; assertex(msg.length() == numSets*sizeof(rowcount_t)); unsigned set = (unsigned)sender - 1; memcpy(&counts[set*numSets], msg.toByteArray(), numSets*sizeof(rowcount_t)); } for (s=0; s<nslaves; s++) { unsigned i=0; for (; i<numSets; i++) totals[i] += counts[s * numSets + i]; } msg.clear(); msg.append(numSets*sizeof(rowcount_t), totals); unsigned endTotalsPos = msg.length(); for (s=0; s<nslaves; s++) { msg.rewrite(endTotalsPos); msg.append(numSets*sizeof(rowcount_t), tallies); container.queryJob().queryJobComm().send(msg, s+1, mpTag); unsigned i=0; for (; i<numSets; i++) tallies[i] += counts[s * numSets + i]; } }
void transferFrom(CMessageBuffer &mb) { // endian TBD swapWith(mb); tag = mb.tag; sender = mb.sender; replytag = mb.replytag; mb.clear(); }
virtual void flushResults(bool complete=false) { if (resultData.length() || complete) { ActPrintLog("flushing result"); addResult(numResults, resultData, complete); resultData.clear(); ActPrintLog("result flushed"); } }
rowcount_t getCount(CActivityBase &activity, unsigned partialResults, rowcount_t limit, mptag_t mpTag) { rowcount_t totalCount = 0; CMessageBuffer msg; while (partialResults--) { rank_t sender; msg.clear(); if (!activity.receiveMsg(msg, RANK_ALL, mpTag, &sender)) return 0; if (activity.queryAbortSoon()) return 0; rowcount_t partialCount; msg.read(partialCount); totalCount += (rowcount_t)partialCount; if (totalCount > limit) break; } return totalCount; }
void process() { CWorkUnitWriteMasterBase::process(); unsigned nslaves = container.queryJob().querySlaves(); CMessageBuffer mb; unsigned s=0; for (; s<nslaves; s++) { loop { if (!container.queryJob().queryJobComm().send(mb, s+1, mpTag)) return; if (!receiveMsg(mb, s+1, mpTag)) return; if (0 == mb.length()) break; unsigned numGot; mb.read(numGot); unsigned l=mb.remaining(); if (workunitWriteLimit && totalSize+resultData.length()+l > workunitWriteLimit) { StringBuffer errMsg("Dataset too large to output to workunit (limit is set to "); errMsg.append(workunitWriteLimit/0x100000).append(") megabytes, in result ("); if (resultName.length()) errMsg.append("name=").append(resultName); else errMsg.append("sequence=").append(resultSeq); errMsg.append(")"); throw MakeThorException(TE_WorkUnitWriteLimitExceeded, "%s", errMsg.str()); } resultData.append(l, mb.readDirect(l)); mb.clear(); numResults += numGot; if (-1 != flushThreshold && resultData.length() >= (unsigned)flushThreshold) flushResults(); } } flushResults(true); }
bool receive(MemoryBuffer &mb) { #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Receiving on tag %d",nodeindex,(int)mpTag); #endif CMessageBuffer msg; rank_t sender; BooleanOnOff onOff(receiving); if (comm->recv(msg, RANK_ALL, mpTag, &sender)) { #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Received %d from %d",nodeindex, msg.length(), sender); #endif try { mb.swapWith(msg); msg.clear(); // send empty reply #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d reply to %d",nodeindex, sender); #endif comm->reply(msg); if (aborted) return false; #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Received %d",nodeindex, mb.length()); #endif } catch (IException *e) { ActPrintLog(activity, e, "CBroadcaster::recv(2): exception"); throw; } } #ifdef _TRACEBROADCAST ActPrintLog(activity, "receive done"); #endif return (0 != mb.length()); }
int run() { ICoven &coven=queryCoven(); CMessageBuffer mb; stopped = false; CMessageHandler<CDaliDiagnosticsServer> handler("CDaliDiagnosticsServer",this,&CDaliDiagnosticsServer::processMessage); while (!stopped) { try { mb.clear(); if (coven.recv(mb,RANK_ALL,MPTAG_DALI_DIAGNOSTICS_REQUEST,NULL)) { handler.handleMessage(mb); } else stopped = true; } catch (IException *e) { EXCLOG(e, "CDaliDiagnosticsServer"); e->Release(); } } return 0; }
int run() { ICoven &coven=queryCoven(); CMessageHandler<CSessionRequestServer> handler("CSessionRequestServer",this,&CSessionRequestServer::processMessage); stopped = false; CMessageBuffer mb; while (!stopped) { try { mb.clear(); if (coven.recv(mb,RANK_ALL,MPTAG_DALI_SESSION_REQUEST,NULL)) handler.handleMessage(mb); else stopped = true; } catch (IException *e) { EXCLOG(e, "CDaliPublisherServer"); e->Release(); } } return 0; }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); loop { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; loop { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); doStopInput(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); loop { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).str(),masterEp.getUrlStr(masterStr).str()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError(TE_FailedToRegisterSlave, "Thor master/slave version mismatch"); return false; } Owned<IGroup> rawGroup = deserializeIGroup(msg); globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1); unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1); unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC); unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT); setClusterGroup(masterNode, rawGroup, slavesPerNode, channelsPerSlave, slaveBasePort, localThorPortInc); unsigned numStrands, blockSize; if (globals->hasProp("Debug/@forceNumStrands")) numStrands = globals->getPropInt("Debug/@forceNumStrands"); else { numStrands = defaultForceNumStrands; globals->setPropInt("Debug/@forceNumStrands", defaultForceNumStrands); } if (globals->hasProp("Debug/@strandBlockSize")) blockSize = globals->getPropInt("Debug/@strandBlockSize"); else { blockSize = defaultStrandBlockSize; globals->setPropInt("Debug/@strandBlockSize", defaultStrandBlockSize); } PROGLOG("Strand defaults: numStrands=%u, blockSize=%u", numStrands, blockSize); const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG); ERRLOG("%s", errStr.str()); #ifndef _DEBUG replyError(TE_FailedToRegisterSlave, errStr.str()); return false; #endif } msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryNodeComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryNodeComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; ::masterNode = LINK(masterNode); PROGLOG("verifying mp connection to rest of cluster"); if (!queryNodeComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.str()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }
bool SortSlaveMP::marshall(ISortSlaveMP &slave, ICommunicator* comm, mptag_t tag) { CMessageBuffer mb; rank_t sender; comm->recv(mb,0,tag,&sender); // NB only recv from master if (mb.length()==0) { PROGLOG("Stopping SortSlaveMP::marshall"); return false; } byte fn; mb.read(fn); CMessageBuffer mbout; mbout.init(mb.getSender(),tag,mb.getReplyTag()); byte okout=1; mbout.append(okout); #ifdef FULLTRACE StringBuffer tmp1; PROGLOG(">SortSlaveMP::marshall(%d) got %d from %s tag %d replytag %d",(int)fn, mb.length(), mb.getSender().getUrlStr(tmp1).str(),tag,mb.getReplyTag()); #endif bool replydone = false; Owned<IException> err; try { switch ((MPSlaveFunctions)(int)fn) { case FN_Connect: { unsigned _part; unsigned _numnodes; mb.read(_part).read(_numnodes); bool ret = slave.Connect(_part,_numnodes); mbout.append(ret); } break; case FN_StartGather: { slave.StartGather(); } break; case FN_GetGatherInfo: { bool hasserializer; mb.read(hasserializer); rowcount_t numlocal; unsigned overflowscale; offset_t totalsize; slave.GetGatherInfo(numlocal,totalsize,overflowscale,hasserializer); mbout.append(numlocal).append(totalsize).append(overflowscale); } break; case FN_GetMinMax: { size32_t keybuffsize; void *keybuff; size32_t avrecsize; rowcount_t ret = slave.GetMinMax(keybuffsize,keybuff,avrecsize); serializeblk(mbout,keybuffsize,keybuff).append(avrecsize).append(ret); free(keybuff); } break; case FN_GetMultiMidPointStart: { replydone = true; comm->reply(mbout); size32_t lkeybuffsize; void * lkeybuff; size32_t hkeybuffsize; void * hkeybuff; deserializeblk(mb,lkeybuffsize,lkeybuff); deserializeblk(mb,hkeybuffsize,hkeybuff); slave.GetMultiMidPointStart(lkeybuffsize,lkeybuff,hkeybuffsize,hkeybuff); free(lkeybuff); free(hkeybuff); } break; case FN_MultiBinChopStop: { unsigned num; mb.read(num); void *out = mbout.reserveTruncate(num*sizeof(rowcount_t)); slave.MultiBinChopStop(num,(rowcount_t *)out); } break; case FN_GetMultiMidPointStop: { size32_t mkeybuffsize=0; void * mkeybuff = NULL; slave.GetMultiMidPointStop(mkeybuffsize,mkeybuff); serializeblk(mbout,mkeybuffsize,mkeybuff); free(mkeybuff); } break; case FN_MultiBinChopStart: { replydone = true; comm->reply(mbout); size32_t keybuffsize; void * keybuff; deserializeblk(mb,keybuffsize,keybuff); byte cmpfn; mb.read(cmpfn); slave.MultiBinChopStart(keybuffsize,(const byte *)keybuff,cmpfn); free(keybuff); } break; case FN_MultiBinChop: { size32_t keybuffsize; void * keybuff; deserializeblk(mb,keybuffsize,keybuff); unsigned num; byte cmpfn; mb.read(num).read(cmpfn); void *out = mbout.reserveTruncate(num*sizeof(rowcount_t)); slave.MultiBinChop(keybuffsize,(const byte *)keybuff,num,(rowcount_t *)out,cmpfn); free(keybuff); } break; case FN_OverflowAdjustMapStart: { replydone = true; comm->reply(mbout); unsigned mapsize; mb.read(mapsize); const void * map = mb.readDirect(mapsize*sizeof(rowcount_t)); size32_t keybuffsize; void * keybuff; deserializeblk(mb,keybuffsize,keybuff); byte cmpfn; mb.read(cmpfn); bool useaux; mb.read(useaux); slave.OverflowAdjustMapStart(mapsize,(rowcount_t *)map,keybuffsize,(const byte *)keybuff,cmpfn,useaux); free(keybuff); } break; case FN_OverflowAdjustMapStop: { unsigned mapsize; mb.read(mapsize); rowcount_t ret=0; size32_t retofs = mbout.length(); mbout.append(ret); void *map=mbout.reserveTruncate(mapsize*sizeof(rowcount_t)); ret = slave.OverflowAdjustMapStop(mapsize,(rowcount_t *)map); // could avoid copy here if passed mb mbout.writeDirect(retofs,sizeof(ret),&ret); } break; case FN_MultiMerge: { replydone = true; comm->reply(mbout); unsigned mapsize; mb.read(mapsize); const void *map = mb.readDirect(mapsize*sizeof(rowcount_t)); unsigned num; mb.read(num); SocketEndpointArray epa; for (unsigned i=0;i<num;i++) { SocketEndpoint ep; ep.deserialize(mb); epa.append(ep); } slave.MultiMerge(mapsize,(rowcount_t *)map,num,epa.getArray()); } break; case FN_MultiMergeBetween: { replydone = true; comm->reply(mbout); unsigned mapsize; mb.read(mapsize); const void *map = mb.readDirect(mapsize*sizeof(rowcount_t)); const void *mapupper = mb.readDirect(mapsize*sizeof(rowcount_t)); unsigned num; mb.read(num); SocketEndpointArray epa; for (unsigned i=0;i<num;i++) { SocketEndpoint ep; ep.deserialize(mb); epa.append(ep); } slave.MultiMergeBetween(mapsize,(rowcount_t *)map,(rowcount_t *)mapupper,num,epa.getArray()); } break; case FN_SingleMerge: { replydone = true; comm->reply(mbout); // async slave.SingleMerge(); } break; case FN_FirstRowOfFile: { StringAttr filename; mb.read(filename); size32_t rowbufsize = 0; byte *rowbuf = NULL; bool ret = slave.FirstRowOfFile(filename,rowbufsize,rowbuf); serializeblk(mbout,rowbufsize,rowbuf); free(rowbuf); mbout.append(ret); } break; case FN_GetMultiNthRow: { unsigned numsplits; mb.read(numsplits); size32_t mkeybuffsize = 0; void * mkeybuf = NULL; slave.GetMultiNthRow(numsplits,mkeybuffsize,mkeybuf); serializeblk(mbout,mkeybuffsize,mkeybuf); free(mkeybuf); } break; case FN_StartMiniSort: { replydone = true; rowcount_t totalrows; mb.read(totalrows); comm->reply(mbout); // async slave.StartMiniSort(totalrows); } break; case FN_Close: { replydone = true; comm->reply(mbout); // async slave.Close(); } break; case FN_CloseWait: { slave.CloseWait(); } break; case FN_Disconnect: { comm->reply(mbout); // async replydone = true; slave.Disconnect(); } // fall through return false; default: throw MakeStringException(-1,"unknown function %d",(int)fn); } } catch (IException *e) { EXCLOG(e,"SortSlaveMP::marshall"); if (!replydone) { mbout.clear(); okout = 0; mbout.append(okout); int err = e->errorCode(); mbout.append(err); StringBuffer outs; e->errorMessage(outs); mbout.append(outs.str()); } err.setown(e); } if (!replydone) { #ifdef FULLTRACE StringBuffer tmp1; PROGLOG("<SortSlaveMP::marshall(%d) send %d to %s tag %d",(int)fn, mbout.length(), mbout.getSender().getUrlStr(tmp1).str(),mbout.getReplyTag()); #endif comm->reply(mbout); } if (err.get()) throw err.getClear(); return true; }
static bool RegisterSelf(SocketEndpoint &masterEp) { StringBuffer slfStr; StringBuffer masterStr; LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).toCharArray(),masterEp.getUrlStr(masterStr).toCharArray()); try { SocketEndpoint ep = masterEp; ep.port = getFixedPort(getMasterPortBase(), TPORT_mp); Owned<INode> masterNode = createINode(ep); CMessageBuffer msg; if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION)) return false; PROGLOG("Initialization received"); unsigned vmajor, vminor; msg.read(vmajor); msg.read(vminor); if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR) { replyError("Thor master/slave version mismatch"); return false; } Owned<IGroup> group = deserializeIGroup(msg); setClusterGroup(group); SocketEndpoint myEp = queryMyNode()->endpoint(); rank_t groupPos = group->rank(queryMyNode()); if (RANK_NULL == groupPos) { replyError("Node not part of thorgroup"); return false; } if (globals->hasProp("@SLAVENUM") && (mySlaveNum != (unsigned)groupPos)) { VStringBuffer errStr("Slave group rank[%d] does not match provided cmd line slaveNum[%d]", mySlaveNum, (unsigned)groupPos); replyError(errStr.str()); return false; } globals->Release(); globals = createPTree(msg); mergeCmdParams(globals); // cmd line const char *_masterBuildTag = globals->queryProp("@masterBuildTag"); const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag"; PROGLOG("Master build: %s", masterBuildTag); #ifndef _DEBUG if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag)) { StringBuffer errStr("Thor master/slave build mismatch, master = "); replyError(errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG).str()); return false; } #endif msg.read((unsigned &)masterSlaveMpTag); msg.clear(); msg.setReplyTag(MPTAG_THORREGISTRATION); if (!queryClusterComm().reply(msg)) return false; PROGLOG("Registration confirmation sent"); if (!queryClusterComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered return false; PROGLOG("verifying mp connection to rest of cluster"); if (!queryClusterComm().verifyAll()) ERRLOG("Failed to connect to all nodes"); else PROGLOG("verified mp connection to rest of cluster"); ::masterNode = LINK(masterNode); LOG(MCdebugProgress, thorJob, "registered %s",slfStr.toCharArray()); } catch (IException *e) { FLLOG(MCexception(e), thorJob, e,"slave registration error"); e->Release(); return false; } return true; }
void clear() { broadcasting.clear(); }
void processMessage(CMessageBuffer &mb) { ICoven &coven=queryCoven(); SessionId id; int fn; mb.read(fn); switch (fn) { case MSR_REGISTER_PROCESS_SESSION: { acceptConnections.wait(); acceptConnections.signal(); Owned<INode> node(deserializeINode(mb)); Owned<INode> servernode(deserializeINode(mb)); // hopefully me, but not if forwarded int role=0; if (mb.length()-mb.getPos()>=sizeof(role)) { // a capability block present mb.read(role); if (!manager.authorizeConnection(role,false)) { SocketEndpoint sender = mb.getSender(); mb.clear(); coven.reply(mb); MilliSleep(100+getRandom()%1000); // Causes client to 'work' for a short time. Owned<INode> node = createINode(sender); coven.disconnect(node); break; } #ifdef _DEBUG StringBuffer eps; PROGLOG("Connection to %s authorized",mb.getSender().getUrlStr(eps).str()); #endif } IGroup *covengrp; id = manager.registerClientProcess(node.get(),covengrp,(DaliClientRole)role); mb.clear().append(id); if (covengrp->rank(servernode)==RANK_NULL) { // must have been redirected covengrp->Release(); // no good, so just use one we know about (may use something more sophisticated later) INode *na = servernode.get(); covengrp = createIGroup(1, &na); } covengrp->serialize(mb); covengrp->Release(); coven.reply(mb); } break; case MSR_SECONDARY_REGISTER_PROCESS_SESSION: { mb.read(id); Owned<INode> node (deserializeINode(mb)); int role; mb.read(role); manager.addProcessSession(id,node.get(),(DaliClientRole)role); mb.clear(); coven.reply(mb); } break; case MSR_REGISTER_SESSION: { SecurityToken tok; SessionId parentid; mb.read(tok).read(parentid); SessionId id = manager.registerSession(tok,parentid); mb.clear().append(id); coven.reply(mb); } break; case MSR_SECONDARY_REGISTER_SESSION: { mb.read(id); manager.addSession(id); mb.clear(); coven.reply(mb); } break; case MSR_LOOKUP_PROCESS_SESSION: { // looks up from node or from id Owned<INode> node (deserializeINode(mb)); if (node->endpoint().isNull()&&(mb.length()-mb.getPos()>=sizeof(id))) { mb.read(id); INode *n = manager.getProcessSessionNode(id); if (n) node.setown(n); node->serialize(mb.clear()); } else { id = manager.lookupProcessSession(node.get()); mb.clear().append(id); } coven.reply(mb); } break; case MSR_STOP_SESSION: { SessionId sessid; bool failed; mb.read(sessid).read(failed); manager.stopSession(sessid,failed); mb.clear(); coven.reply(mb); } break; case MSR_LOOKUP_LDAP_PERMISSIONS: { StringAttr key; StringAttr obj; Owned<IUserDescriptor> udesc=createUserDescriptor(); StringAttr username; StringAttr passwordenc; mb.read(key).read(obj); udesc->deserialize(mb); #ifndef _NO_DALIUSER_STACKTRACE //following debug code to be removed StringBuffer sb; udesc->getUserName(sb); if (0==sb.length()) { DBGLOG("UNEXPECTED USER (NULL) in dasess.cpp CSessionRequestServer::processMessage() line %d", __LINE__); } #endif unsigned auditflags = 0; if (mb.length()-mb.getPos()>=sizeof(auditflags)) mb.read(auditflags); int err = 0; int ret=manager.getPermissionsLDAP(key,obj,udesc,auditflags,&err); mb.clear().append(ret); if (err) mb.append(err); coven.reply(mb); } break; } }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); for (;;) { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; for (;;) { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); stop(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); for (;;) { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } } else { if (!refactor || active) { try { StringBuffer partFname; getPartFilename(*partDesc, 0, partFname); ActPrintLog("INDEXWRITE: process: handling fname : %s", partFname.str()); open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); ActPrintLog("INDEXWRITE: write"); BooleanOnOff tf(receiving); if (!refactor || !active) receiving = false; do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; processRow(row); } while (!abortSoon); ActPrintLog("INDEXWRITE: write level 0 complete"); } catch (CATCHALL) { close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); throw; } close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); stop(); ActPrintLog("INDEXWRITE: Wrote %" RCPF "d records", processed & THORDATALINK_COUNT_MASK); if (buildTlk) { ActPrintLog("INDEXWRITE: sending rows"); NodeInfoArray tlkRows; CMessageBuffer msg; if (firstNode()) { if (processed & THORDATALINK_COUNT_MASK) { if (enableTlkPart0) tlkRows.append(* new CNodeInfo(0, firstRow.get(), firstRowSize, totalCount)); tlkRows.append(* new CNodeInfo(1, lastRow.get(), lastRowSize, totalCount)); } } else { if (processed & THORDATALINK_COUNT_MASK) { CNodeInfo row(queryJobChannel().queryMyRank(), lastRow.get(), lastRowSize, totalCount); row.serialize(msg); } queryJobChannel().queryJobComm().send(msg, 1, mpTag); } if (firstNode()) { ActPrintLog("INDEXWRITE: Waiting on tlk to complete"); // JCSMORE if refactor==true, is rowsToReceive here right?? unsigned rowsToReceive = (refactor ? (tlkDesc->queryOwner().numParts()-1) : container.queryJob().querySlaves()) -1; // -1 'cos got my own in array already ActPrintLog("INDEXWRITE: will wait for info from %d slaves before writing TLK", rowsToReceive); while (rowsToReceive--) { msg.clear(); receiveMsg(msg, RANK_ALL, mpTag); // NH->JCS RANK_ALL_OTHER not supported for recv if (abortSoon) return; if (msg.length()) { CNodeInfo *ni = new CNodeInfo(); ni->deserialize(msg); tlkRows.append(*ni); } } tlkRows.sort(CNodeInfo::compare); StringBuffer path; getPartFilename(*tlkDesc, 0, path); ActPrintLog("INDEXWRITE: creating toplevel key file : %s", path.str()); try { open(*tlkDesc, true, helper->queryDiskRecordSize()->isVariableSize()); if (tlkRows.length()) { CNodeInfo &lastNode = tlkRows.item(tlkRows.length()-1); memset(lastNode.value, 0xff, lastNode.size); } ForEachItemIn(idx, tlkRows) { CNodeInfo &info = tlkRows.item(idx); builder->processKeyData((char *)info.value, info.pos, info.size); } close(*tlkDesc, tlkCrc, true); } catch (CATCHALL) { abortSoon = true; close(*tlkDesc, tlkCrc, true); removeFiles(*partDesc); throw; } } } else if (!isLocal && firstNode())