void sendResult(const void *row, IOutputRowSerializer *serializer) { CMessageBuffer mb; size32_t start = mb.length(); size32_t sz = 0; mb.append(sz); if (row&&hadElement) { CMemoryRowSerializer mbs(mb); serializer->serialize(mbs,(const byte *)row); sz = mb.length()-start-sizeof(size32_t); mb.writeDirect(start,sizeof(size32_t),&sz); } container.queryJob().queryJobComm().send(mb, 0, masterMpTag); }
const void *CPartialResultAggregator::getResult() { CMessageBuffer mb; if (activity.receiveMsg(mb, 0, activity.queryMpTag())) { if (mb.length()) { CThorStreamDeserializerSource ds(mb.length(), mb.readDirect(mb.length())); RtlDynamicRowBuilder rowBuilder(activity.queryRowAllocator()); size32_t sz = activity.queryRowDeserializer()->deserialize(rowBuilder,ds); return rowBuilder.finalizeRowClear(sz); } } return NULL; }
virtual void process() { CChooseSetsActivityMaster::process(); IHThorChooseSetsArg *helper = (IHThorChooseSetsArg *)queryHelper(); unsigned numSets = helper->getNumSets(); unsigned nslaves = container.queryJob().querySlaves(); MemoryBuffer countMb; rowcount_t *counts = (rowcount_t *)countMb.reserveTruncate((numSets*(nslaves+2)) * sizeof(rowcount_t)); rowcount_t *totals = counts + nslaves*numSets; rowcount_t *tallies = totals + numSets; memset(counts, 0, countMb.length()); unsigned s=nslaves; CMessageBuffer msg; while (s--) { msg.clear(); rank_t sender; if (!receiveMsg(msg, RANK_ALL, mpTag, &sender)) return; assertex(msg.length() == numSets*sizeof(rowcount_t)); unsigned set = (unsigned)sender - 1; memcpy(&counts[set*numSets], msg.toByteArray(), numSets*sizeof(rowcount_t)); } for (s=0; s<nslaves; s++) { unsigned i=0; for (; i<numSets; i++) totals[i] += counts[s * numSets + i]; } msg.clear(); msg.append(numSets*sizeof(rowcount_t), totals); unsigned endTotalsPos = msg.length(); for (s=0; s<nslaves; s++) { msg.rewrite(endTotalsPos); msg.append(numSets*sizeof(rowcount_t), tallies); container.queryJob().queryJobComm().send(msg, s+1, mpTag); unsigned i=0; for (; i<numSets; i++) tallies[i] += counts[s * numSets + i]; } }
virtual void flushResults(bool complete=false) { if (resultData.length() || complete) { ActPrintLog("flushing result"); addResult(numResults, resultData, complete); resultData.clear(); ActPrintLog("result flushed"); } }
const void *getAggregate(CActivityBase &activity, unsigned partialResults, IRowInterfaces &rowIf, IHThorCompoundAggregateExtra &aggHelper, mptag_t mpTag) { // JCSMORE - pity this isn't common routine with similar one in aggregate, but helper is not common CThorRowArray slaveResults; slaveResults.ensure(partialResults); unsigned _partialResults = partialResults; while (_partialResults--) { CMessageBuffer mb; rank_t sender; if (!activity.receiveMsg(mb, RANK_ALL, mpTag, &sender)) return false; if (activity.queryAbortSoon()) return 0; if (mb.length()) { CThorStreamDeserializerSource ds(mb.length(), mb.readDirect(mb.length())); RtlDynamicRowBuilder rowBuilder(rowIf.queryRowAllocator()); size32_t sz = rowIf.queryRowDeserializer()->deserialize(rowBuilder, ds); slaveResults.setRow(sender-1, rowBuilder.finalizeRowClear(sz)); } } RtlDynamicRowBuilder result(rowIf.queryRowAllocator(), false); size32_t sz; bool first = true; _partialResults = 0; for (;_partialResults<partialResults; _partialResults++) { const void *partialResult = slaveResults.item(_partialResults); if (partialResult) { if (first) { first = false; sz = cloneRow(result, slaveResults.item(_partialResults), rowIf.queryRowMetaData()); } else sz = aggHelper.mergeAggregate(result, partialResult); } } if (first) sz = aggHelper.clearAggregate(result); return result.finalizeRowClear(sz); }
void process() { processed = 0; input = inputs.item(0); startInput(input); processed = THORDATALINK_STARTED; OwnedConstThorRow row = input->ungroupedNextRow(); CMessageBuffer mb; size32_t lenpos = mb.length(); // its 0 really mb.append((size32_t)0); if (row) { CMemoryRowSerializer msz(mb); ::queryRowSerializer(input)->serialize(msz,(const byte *)row.get()); size32_t sz = mb.length()-lenpos-sizeof(size32_t); mb.writeDirect(lenpos,sizeof(size32_t),&sz); processed++; } container.queryJob().queryJobComm().send(mb, 0, masterMpTag); }
bool SortSlaveMP::sendRecv(CMessageBuffer &mb, unsigned timeout) { if (!comm->sendRecv(mb,rank,tag,timeout)) return false; byte ok = 255; if (mb.length()) { mb.read(ok); if (ok==1) return true; if (ok==0) { int err; mb.read(err); StringAttr errstr; mb.read(errstr); throw MakeStringException(err, "%s", errstr.get()); } } throw MakeStringException(-1,"SortSlaveMP::sendRecv() protocol error %d",(int)ok); return false; }
void process() { CWorkUnitWriteMasterBase::process(); unsigned nslaves = container.queryJob().querySlaves(); CMessageBuffer mb; unsigned s=0; for (; s<nslaves; s++) { loop { if (!container.queryJob().queryJobComm().send(mb, s+1, mpTag)) return; if (!receiveMsg(mb, s+1, mpTag)) return; if (0 == mb.length()) break; unsigned numGot; mb.read(numGot); unsigned l=mb.remaining(); if (workunitWriteLimit && totalSize+resultData.length()+l > workunitWriteLimit) { StringBuffer errMsg("Dataset too large to output to workunit (limit is set to "); errMsg.append(workunitWriteLimit/0x100000).append(") megabytes, in result ("); if (resultName.length()) errMsg.append("name=").append(resultName); else errMsg.append("sequence=").append(resultSeq); errMsg.append(")"); throw MakeThorException(TE_WorkUnitWriteLimitExceeded, "%s", errMsg.str()); } resultData.append(l, mb.readDirect(l)); mb.clear(); numResults += numGot; if (-1 != flushThreshold && resultData.length() >= (unsigned)flushThreshold) flushResults(); } } flushResults(true); }
virtual void process() { CMasterActivity::process(); IHThorDistributionArg * helper = (IHThorDistributionArg *)queryHelper(); IOutputMetaData *rcSz = helper->queryInternalRecordSize(); unsigned nslaves = container.queryJob().querySlaves(); IDistributionTable * * result = (IDistributionTable * *)createThorRow(rcSz->getMinRecordSize()); // not a real row helper->clearAggregate(result); while (nslaves--) { rank_t sender; CMessageBuffer msg; if (!receiveMsg(msg, RANK_ALL, mpTag, &sender)) return; #if THOR_TRACE_LEVEL >= 5 ActPrintLog("Received distribution result from node %d", (unsigned)sender); #endif if (msg.length()) helper->merge(result, msg); } StringBuffer tmp; tmp.append("<XML>"); helper->gatherResult(result, tmp); tmp.append("</XML>"); #if THOR_TRACE_LEVEL >= 5 ActPrintLog("Distribution result: %s", tmp.str()); #endif helper->sendResult(tmp.length(), tmp.str()); destroyThorRow(result); }
bool receive(MemoryBuffer &mb) { #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Receiving on tag %d",nodeindex,(int)mpTag); #endif CMessageBuffer msg; rank_t sender; BooleanOnOff onOff(receiving); if (comm->recv(msg, RANK_ALL, mpTag, &sender)) { #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Received %d from %d",nodeindex, msg.length(), sender); #endif try { mb.swapWith(msg); msg.clear(); // send empty reply #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d reply to %d",nodeindex, sender); #endif comm->reply(msg); if (aborted) return false; #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Received %d",nodeindex, mb.length()); #endif } catch (IException *e) { ActPrintLog(activity, e, "CBroadcaster::recv(2): exception"); throw; } } #ifdef _TRACEBROADCAST ActPrintLog(activity, "receive done"); #endif return (0 != mb.length()); }
void processMessage(CMessageBuffer &mb) { ICoven &coven=queryCoven(); SessionId id; int fn; mb.read(fn); switch (fn) { case MSR_REGISTER_PROCESS_SESSION: { acceptConnections.wait(); acceptConnections.signal(); Owned<INode> node(deserializeINode(mb)); Owned<INode> servernode(deserializeINode(mb)); // hopefully me, but not if forwarded int role=0; if (mb.length()-mb.getPos()>=sizeof(role)) { // a capability block present mb.read(role); if (!manager.authorizeConnection(role,false)) { SocketEndpoint sender = mb.getSender(); mb.clear(); coven.reply(mb); MilliSleep(100+getRandom()%1000); // Causes client to 'work' for a short time. Owned<INode> node = createINode(sender); coven.disconnect(node); break; } #ifdef _DEBUG StringBuffer eps; PROGLOG("Connection to %s authorized",mb.getSender().getUrlStr(eps).str()); #endif } IGroup *covengrp; id = manager.registerClientProcess(node.get(),covengrp,(DaliClientRole)role); mb.clear().append(id); if (covengrp->rank(servernode)==RANK_NULL) { // must have been redirected covengrp->Release(); // no good, so just use one we know about (may use something more sophisticated later) INode *na = servernode.get(); covengrp = createIGroup(1, &na); } covengrp->serialize(mb); covengrp->Release(); coven.reply(mb); } break; case MSR_SECONDARY_REGISTER_PROCESS_SESSION: { mb.read(id); Owned<INode> node (deserializeINode(mb)); int role; mb.read(role); manager.addProcessSession(id,node.get(),(DaliClientRole)role); mb.clear(); coven.reply(mb); } break; case MSR_REGISTER_SESSION: { SecurityToken tok; SessionId parentid; mb.read(tok).read(parentid); SessionId id = manager.registerSession(tok,parentid); mb.clear().append(id); coven.reply(mb); } break; case MSR_SECONDARY_REGISTER_SESSION: { mb.read(id); manager.addSession(id); mb.clear(); coven.reply(mb); } break; case MSR_LOOKUP_PROCESS_SESSION: { // looks up from node or from id Owned<INode> node (deserializeINode(mb)); if (node->endpoint().isNull()&&(mb.length()-mb.getPos()>=sizeof(id))) { mb.read(id); INode *n = manager.getProcessSessionNode(id); if (n) node.setown(n); node->serialize(mb.clear()); } else { id = manager.lookupProcessSession(node.get()); mb.clear().append(id); } coven.reply(mb); } break; case MSR_STOP_SESSION: { SessionId sessid; bool failed; mb.read(sessid).read(failed); manager.stopSession(sessid,failed); mb.clear(); coven.reply(mb); } break; case MSR_LOOKUP_LDAP_PERMISSIONS: { StringAttr key; StringAttr obj; Owned<IUserDescriptor> udesc=createUserDescriptor(); StringAttr username; StringAttr passwordenc; mb.read(key).read(obj); udesc->deserialize(mb); #ifndef _NO_DALIUSER_STACKTRACE //following debug code to be removed StringBuffer sb; udesc->getUserName(sb); if (0==sb.length()) { DBGLOG("UNEXPECTED USER (NULL) in dasess.cpp CSessionRequestServer::processMessage() line %d", __LINE__); } #endif unsigned auditflags = 0; if (mb.length()-mb.getPos()>=sizeof(auditflags)) mb.read(auditflags); int err = 0; int ret=manager.getPermissionsLDAP(key,obj,udesc,auditflags,&err); mb.clear().append(ret); if (err) mb.append(err); coven.reply(mb); } break; } }
bool send(INode *node,unsigned timeout) { unsigned retries = 3; loop { try { CMessageBuffer mb; serialize(mb); if (queryWorldCommunicator().sendRecv(mb,node,MPTAG_SASHA_REQUEST,timeout?timeout:12*60*60*1000)) { // could take a long time! clearIds(); clearResults(); if (action==SCA_WORKUNIT_SERVICES_GET) { mb.swapWith(wusbuf); } else { unsigned n=0; unsigned i; if (mb.length()-mb.getPos()>=sizeof(unsigned)) { mb.read(n); for (i=0;i<n;i++) { StringAttr s; mb.read(s); addId(s.get()); } if (mb.length()-mb.getPos()>=sizeof(unsigned)+sizeof(bool)) { mb.read(resultoverflow); mb.read(n); for (i=0;i<n;i++) { StringAttr res; mb.read(res); size32_t reslen = res.length(); results.append(*new StringAttrItem(res,reslen)); resultsize += reslen; } if (mb.length()-mb.getPos()>=sizeof(unsigned)) { mb.read(numdts); free(dts); dts = NULL; if (numdts) { dts = (CDateTime *)calloc(numdts,sizeof(CDateTime)); for (i=0;i<numdts;i++) dts[i].deserialize(mb); } } } } } return true; } else break; } catch (IException *e) { if ((--retries==0)||(action==SCA_STOP)) throw; EXCLOG(e,"CSashaCommand send"); ::Release(e); } try { // shouldn't really be necessary but make sure socket really closed queryWorldCommunicator().disconnect(node); } catch (IException *e) { EXCLOG(e,"CSashaCommand disconnect"); ::Release(e); } }; return false; }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); loop { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; loop { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); doStopInput(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); loop { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } }
bool SortSlaveMP::marshall(ISortSlaveMP &slave, ICommunicator* comm, mptag_t tag) { CMessageBuffer mb; rank_t sender; comm->recv(mb,0,tag,&sender); // NB only recv from master if (mb.length()==0) { PROGLOG("Stopping SortSlaveMP::marshall"); return false; } byte fn; mb.read(fn); CMessageBuffer mbout; mbout.init(mb.getSender(),tag,mb.getReplyTag()); byte okout=1; mbout.append(okout); #ifdef FULLTRACE StringBuffer tmp1; PROGLOG(">SortSlaveMP::marshall(%d) got %d from %s tag %d replytag %d",(int)fn, mb.length(), mb.getSender().getUrlStr(tmp1).str(),tag,mb.getReplyTag()); #endif bool replydone = false; Owned<IException> err; try { switch ((MPSlaveFunctions)(int)fn) { case FN_Connect: { unsigned _part; unsigned _numnodes; mb.read(_part).read(_numnodes); bool ret = slave.Connect(_part,_numnodes); mbout.append(ret); } break; case FN_StartGather: { slave.StartGather(); } break; case FN_GetGatherInfo: { bool hasserializer; mb.read(hasserializer); rowcount_t numlocal; unsigned overflowscale; offset_t totalsize; slave.GetGatherInfo(numlocal,totalsize,overflowscale,hasserializer); mbout.append(numlocal).append(totalsize).append(overflowscale); } break; case FN_GetMinMax: { size32_t keybuffsize; void *keybuff; size32_t avrecsize; rowcount_t ret = slave.GetMinMax(keybuffsize,keybuff,avrecsize); serializeblk(mbout,keybuffsize,keybuff).append(avrecsize).append(ret); free(keybuff); } break; case FN_GetMultiMidPointStart: { replydone = true; comm->reply(mbout); size32_t lkeybuffsize; void * lkeybuff; size32_t hkeybuffsize; void * hkeybuff; deserializeblk(mb,lkeybuffsize,lkeybuff); deserializeblk(mb,hkeybuffsize,hkeybuff); slave.GetMultiMidPointStart(lkeybuffsize,lkeybuff,hkeybuffsize,hkeybuff); free(lkeybuff); free(hkeybuff); } break; case FN_MultiBinChopStop: { unsigned num; mb.read(num); void *out = mbout.reserveTruncate(num*sizeof(rowcount_t)); slave.MultiBinChopStop(num,(rowcount_t *)out); } break; case FN_GetMultiMidPointStop: { size32_t mkeybuffsize=0; void * mkeybuff = NULL; slave.GetMultiMidPointStop(mkeybuffsize,mkeybuff); serializeblk(mbout,mkeybuffsize,mkeybuff); free(mkeybuff); } break; case FN_MultiBinChopStart: { replydone = true; comm->reply(mbout); size32_t keybuffsize; void * keybuff; deserializeblk(mb,keybuffsize,keybuff); byte cmpfn; mb.read(cmpfn); slave.MultiBinChopStart(keybuffsize,(const byte *)keybuff,cmpfn); free(keybuff); } break; case FN_MultiBinChop: { size32_t keybuffsize; void * keybuff; deserializeblk(mb,keybuffsize,keybuff); unsigned num; byte cmpfn; mb.read(num).read(cmpfn); void *out = mbout.reserveTruncate(num*sizeof(rowcount_t)); slave.MultiBinChop(keybuffsize,(const byte *)keybuff,num,(rowcount_t *)out,cmpfn); free(keybuff); } break; case FN_OverflowAdjustMapStart: { replydone = true; comm->reply(mbout); unsigned mapsize; mb.read(mapsize); const void * map = mb.readDirect(mapsize*sizeof(rowcount_t)); size32_t keybuffsize; void * keybuff; deserializeblk(mb,keybuffsize,keybuff); byte cmpfn; mb.read(cmpfn); bool useaux; mb.read(useaux); slave.OverflowAdjustMapStart(mapsize,(rowcount_t *)map,keybuffsize,(const byte *)keybuff,cmpfn,useaux); free(keybuff); } break; case FN_OverflowAdjustMapStop: { unsigned mapsize; mb.read(mapsize); rowcount_t ret=0; size32_t retofs = mbout.length(); mbout.append(ret); void *map=mbout.reserveTruncate(mapsize*sizeof(rowcount_t)); ret = slave.OverflowAdjustMapStop(mapsize,(rowcount_t *)map); // could avoid copy here if passed mb mbout.writeDirect(retofs,sizeof(ret),&ret); } break; case FN_MultiMerge: { replydone = true; comm->reply(mbout); unsigned mapsize; mb.read(mapsize); const void *map = mb.readDirect(mapsize*sizeof(rowcount_t)); unsigned num; mb.read(num); SocketEndpointArray epa; for (unsigned i=0;i<num;i++) { SocketEndpoint ep; ep.deserialize(mb); epa.append(ep); } slave.MultiMerge(mapsize,(rowcount_t *)map,num,epa.getArray()); } break; case FN_MultiMergeBetween: { replydone = true; comm->reply(mbout); unsigned mapsize; mb.read(mapsize); const void *map = mb.readDirect(mapsize*sizeof(rowcount_t)); const void *mapupper = mb.readDirect(mapsize*sizeof(rowcount_t)); unsigned num; mb.read(num); SocketEndpointArray epa; for (unsigned i=0;i<num;i++) { SocketEndpoint ep; ep.deserialize(mb); epa.append(ep); } slave.MultiMergeBetween(mapsize,(rowcount_t *)map,(rowcount_t *)mapupper,num,epa.getArray()); } break; case FN_SingleMerge: { replydone = true; comm->reply(mbout); // async slave.SingleMerge(); } break; case FN_FirstRowOfFile: { StringAttr filename; mb.read(filename); size32_t rowbufsize = 0; byte *rowbuf = NULL; bool ret = slave.FirstRowOfFile(filename,rowbufsize,rowbuf); serializeblk(mbout,rowbufsize,rowbuf); free(rowbuf); mbout.append(ret); } break; case FN_GetMultiNthRow: { unsigned numsplits; mb.read(numsplits); size32_t mkeybuffsize = 0; void * mkeybuf = NULL; slave.GetMultiNthRow(numsplits,mkeybuffsize,mkeybuf); serializeblk(mbout,mkeybuffsize,mkeybuf); free(mkeybuf); } break; case FN_StartMiniSort: { replydone = true; rowcount_t totalrows; mb.read(totalrows); comm->reply(mbout); // async slave.StartMiniSort(totalrows); } break; case FN_Close: { replydone = true; comm->reply(mbout); // async slave.Close(); } break; case FN_CloseWait: { slave.CloseWait(); } break; case FN_Disconnect: { comm->reply(mbout); // async replydone = true; slave.Disconnect(); } // fall through return false; default: throw MakeStringException(-1,"unknown function %d",(int)fn); } } catch (IException *e) { EXCLOG(e,"SortSlaveMP::marshall"); if (!replydone) { mbout.clear(); okout = 0; mbout.append(okout); int err = e->errorCode(); mbout.append(err); StringBuffer outs; e->errorMessage(outs); mbout.append(outs.str()); } err.setown(e); } if (!replydone) { #ifdef FULLTRACE StringBuffer tmp1; PROGLOG("<SortSlaveMP::marshall(%d) send %d to %s tag %d",(int)fn, mbout.length(), mbout.getSender().getUrlStr(tmp1).str(),mbout.getReplyTag()); #endif comm->reply(mbout); } if (err.get()) throw err.getClear(); return true; }
void doBroadcast() { try { unsigned i = 0; unsigned n; if (1 == nodeindex) n = broadcastSlave-1; else if (broadcastSlave==nodeindex) n = 0; else n = nodeindex-1; loop { unsigned t = target(i++,n); if (t>numnodes) break; if (t != broadcastSlave) { #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Sending to node %d size %d",nodeindex,t,broadcasting.length()); #endif mptag_t rt = createReplyTag(); broadcasting.setReplyTag(rt); // simulate sendRecv comm->send(broadcasting, t, mpTag); CMessageBuffer rMsg; comm->recv(rMsg, t, rt); #ifdef _TRACEBROADCAST ActPrintLog(activity, "Broadcast node %d Sent to node %d size %d received back %d",nodeindex,t,broadcasting.length(),rMsg.length()); #endif } } } catch (IException *e) { ActPrintLog(activity, e, "CBroadcaster::broadcast exception"); throw; } #ifdef _TRACEBROADCAST ActPrintLog(activity, "do broadcast done done"); #endif }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); for (;;) { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; for (;;) { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); stop(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); for (;;) { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } } else { if (!refactor || active) { try { StringBuffer partFname; getPartFilename(*partDesc, 0, partFname); ActPrintLog("INDEXWRITE: process: handling fname : %s", partFname.str()); open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); ActPrintLog("INDEXWRITE: write"); BooleanOnOff tf(receiving); if (!refactor || !active) receiving = false; do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; processRow(row); } while (!abortSoon); ActPrintLog("INDEXWRITE: write level 0 complete"); } catch (CATCHALL) { close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); throw; } close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); stop(); ActPrintLog("INDEXWRITE: Wrote %" RCPF "d records", processed & THORDATALINK_COUNT_MASK); if (buildTlk) { ActPrintLog("INDEXWRITE: sending rows"); NodeInfoArray tlkRows; CMessageBuffer msg; if (firstNode()) { if (processed & THORDATALINK_COUNT_MASK) { if (enableTlkPart0) tlkRows.append(* new CNodeInfo(0, firstRow.get(), firstRowSize, totalCount)); tlkRows.append(* new CNodeInfo(1, lastRow.get(), lastRowSize, totalCount)); } } else { if (processed & THORDATALINK_COUNT_MASK) { CNodeInfo row(queryJobChannel().queryMyRank(), lastRow.get(), lastRowSize, totalCount); row.serialize(msg); } queryJobChannel().queryJobComm().send(msg, 1, mpTag); } if (firstNode()) { ActPrintLog("INDEXWRITE: Waiting on tlk to complete"); // JCSMORE if refactor==true, is rowsToReceive here right?? unsigned rowsToReceive = (refactor ? (tlkDesc->queryOwner().numParts()-1) : container.queryJob().querySlaves()) -1; // -1 'cos got my own in array already ActPrintLog("INDEXWRITE: will wait for info from %d slaves before writing TLK", rowsToReceive); while (rowsToReceive--) { msg.clear(); receiveMsg(msg, RANK_ALL, mpTag); // NH->JCS RANK_ALL_OTHER not supported for recv if (abortSoon) return; if (msg.length()) { CNodeInfo *ni = new CNodeInfo(); ni->deserialize(msg); tlkRows.append(*ni); } } tlkRows.sort(CNodeInfo::compare); StringBuffer path; getPartFilename(*tlkDesc, 0, path); ActPrintLog("INDEXWRITE: creating toplevel key file : %s", path.str()); try { open(*tlkDesc, true, helper->queryDiskRecordSize()->isVariableSize()); if (tlkRows.length()) { CNodeInfo &lastNode = tlkRows.item(tlkRows.length()-1); memset(lastNode.value, 0xff, lastNode.size); } ForEachItemIn(idx, tlkRows) { CNodeInfo &info = tlkRows.item(idx); builder->processKeyData((char *)info.value, info.pos, info.size); } close(*tlkDesc, tlkCrc, true); } catch (CATCHALL) { abortSoon = true; close(*tlkDesc, tlkCrc, true); removeFiles(*partDesc); throw; } } } else if (!isLocal && firstNode())