void sendRemainingHeaderLines() { if (!headerLines) return; unsigned which = sentHeaderLines->scanInvert(0, false); if (which < subFiles) { CMessageBuffer msgMb; bool someLeft=false; do { msgMb.append(which); unsigned &remaining = getHeaderLines(which); if (0 != remaining) someLeft = true; msgMb.append(remaining); which = sentHeaderLines->scanInvert(which+1, false); } while (which < subFiles); if (someLeft) queryJobChannel().queryJobComm().send(msgMb, queryJobChannel().queryMyRank()+1, mpTag); else sendAllDone(); } }
virtual void validateFile(IDistributedFile *file) { IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper(); bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags()); bool isGrouped = fileDesc->isGrouped(); if (isGrouped != codeGenGrouped) { Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped"); queryJobChannel().fireException(e); } IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta(); if (recordSize->isFixedSize()) // fixed size { if (0 != fileDesc->queryProperties().getPropInt("@recordSize")) { size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize"); if (isGrouped) rSz--; // eog byte not to be included in this test. if (rSz != recordSize->getMinRecordSize()) throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize()); } if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags())) { size32_t rSz = recordSize->getMinRecordSize(); if (isGrouped) rSz++; if (rSz >= MIN_ROWCOMPRESS_RECSIZE) { Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get()); queryJobChannel().fireException(e); } } } if (0 == (TDRnocrccheck & helper->getFlags())) checkFormatCrc(this, file, helper->getFormatCrc(), false); }
virtual void init(MemoryBuffer &data, MemoryBuffer &slaveData) override { if (global) { mptag_t barrierTag = queryJobChannel().deserializeMPTag(data); barrier.setown(queryJobChannel().createBarrier(barrierTag)); } }
const void *getFirst() // for global, not called on 1st slave { CMessageBuffer msg; if (!queryJobChannel().queryJobComm().recv(msg, queryJobChannel().queryMyRank()-1, mpTag)) // previous node return NULL; msg.read(count); size32_t r = msg.remaining(); OwnedConstThorRow firstRow; if (r) firstRow.deserialize(inrowif, r, msg.readDirect(r)); return firstRow.getClear(); }
void sendTallies() // NB: not called on last node. { #if THOR_TRACE_LEVEL >= 5 StringBuffer s; unsigned idx=0; for (; idx<numSets; idx++) s.append("[").append(tallies[idx]).append("]"); ActPrintLog("CHOOSESETS: Outgoing count = %s", s.str()); #endif CMessageBuffer msg; msg.append(numSets * sizeof(unsigned), tallies); queryJobChannel().queryJobComm().send(msg, queryJobChannel().queryMyRank()+1, mpTag); }
virtual void start() override { ActivityTimer s(totalCycles, timeActivities); ActPrintLog(rolloverEnabled ? "GROUP: is global" : "GROUP: is local"); PARENT::start(); eogNext = prevEog = eof = false; if (rolloverEnabled) { useRollover = !lastNode(); #ifdef _TESTING ActPrintLog("Node number = %d, Total Nodes = %d", queryJobChannel().queryMyRank(), container.queryJob().querySlaves()); #endif } stream.set(inputStream); startLastGroup = getDataLinkGlobalCount(); next.setown(getNext()); if (rolloverEnabled && !firstNode()) // 1st node can have nothing to send { Owned<IThorRowCollector> collector = createThorRowCollector(*this, this, NULL, stableSort_none, rc_mixed, SPILL_PRIORITY_SPILLABLE_STREAM); Owned<IRowWriter> writer = collector->getWriter(); if (next) { ActPrintLog("GROUP: Sending first group to previous node(%d)", queryJobChannel().queryMyRank()-1); for (;;) { writer->putRow(next.getLink()); if (abortSoon) break; //always send group even when aborting OwnedConstThorRow next2 = getNext(); if (!next2) { eof = true; break; } else if (!helper->isSameGroup(next2, next)) { next.setown(next2.getClear()); break; } next.setown(next2.getClear()); } } writer.clear(); ActPrintLog("GROUP: %" RCPF "d records to send", collector->numRows()); Owned<IRowStream> strm = collector->getStream(); rowServer.setown(createRowServer(this, strm, queryJobChannel().queryJobComm(), mpTag)); } }
void CDiskWriteSlaveActivityBase::close() { try { if (out) { uncompressedBytesWritten = out->getPosition(); if (calcFileCrc) { if (diskHelperBase->getFlags() & TDWextend) { assertex(!"TBD need to merge CRC"); } else out->flush(&fileCRC); } else if (!abortSoon) out->flush(); out.clear(); } else if (outraw) { outraw->flush(); uncompressedBytesWritten = outraw->tell(); outraw.clear(); } { CriticalBlock block(statsCs); mergeStats(fileStats, outputIO); outputIO.clear(); } if (!rfsQueryParallel && dlfn.isExternal() && !lastNode()) { rowcount_t rows = processed & THORDATALINK_COUNT_MASK; ActPrintLog("External write done, signalling next (row count = %" RCPF "d)", rows); CMessageBuffer msg; msg.append(rows); msg.append(tempExternalName); queryJobChannel().queryJobComm().send(msg, queryJobChannel().queryMyRank()+1, mpTag); } } catch (IException *e) { ActPrintLogEx(&queryContainer(), e, thorlog_null, MCwarning, "Error closing file: %s", fName.get()); abortSoon = true; removeFiles(); throw e; } if (abortSoon) removeFiles(); }
void sendHeaderLines(unsigned subFile, unsigned part) { if (0 == headerLinesRemaining[subFile]) { if (sentHeaderLines->testSet(subFile)) return; unsigned which = gotHeaderLines->scan(0, false); if (which == subFiles) // all received { bool someLeft=false; unsigned hL=0; for (; hL<subFiles; hL++) { if (headerLinesRemaining[hL]) { someLeft = true; break; } } if (!someLeft) { sendAllDone(); return; } } } else { if (localLastPart[subFile] != part) // only ready to send if last local part return; if (sentHeaderLines->testSet(subFile)) return; } CMessageBuffer msgMb; msgMb.append(subFile); msgMb.append(headerLinesRemaining[subFile]); // inform next slave about all subfiles I'm not dealing with. for (unsigned s=0; s<subFiles; s++) { if (NotFound == localLastPart[s]) { sentHeaderLines->testSet(s); msgMb.append(s); msgMb.append(headerLinesRemaining[s]); } } queryJobChannel().queryJobComm().send(msgMb, queryJobChannel().queryMyRank()+1, mpTag); }
void process() { CMessageBuffer msg; unsigned inputs = container.getInputs(); unsigned slaves = container.queryJob().querySlaves(); unsigned s; bool readSome=false, slaveReadSome; IntArray replyTags; for (s=0; s<slaves; s++) replyTags.append(0); while (inputs>1) { inputs--; for (s=0; s<slaves; s++) { rank_t sender; if (!receiveMsg(msg, RANK_ALL, replyTag, &sender)) return; replyTags.replace(msg.getReplyTag(), ((int)sender)-1); msg.read(slaveReadSome); if (slaveReadSome) readSome = true; } msg.clear().append(readSome); for (s=0; s<slaves; s++) { if (!queryJobChannel().queryJobComm().send(msg, ((rank_t) s+1), (mptag_t) replyTags.item(s), LONGTIMEOUT)) throw MakeActivityException(this, 0, "Failed to give result to slave"); } if (readSome) // got some, have told slaves to ignore rest, so finish break; } }
virtual void init() { CMasterActivity::init(); OwnedRoxieString fname(helper->getFileName()); Owned<IDistributedFile> fetchFile = queryThorFileManager().lookup(container.queryJob(), fname, false, 0 != (helper->getFetchFlags() & FFdatafileoptional), true); if (fetchFile) { if (isFileKey(fetchFile)) throw MakeActivityException(this, 0, "Attempting to read index as a flat file: %s", fname.get()); Owned<IFileDescriptor> fileDesc = getConfiguredFileDescriptor(*fetchFile); void *ekey; size32_t ekeylen; helper->getFileEncryptKey(ekeylen,ekey); bool encrypted = fileDesc->queryProperties().getPropBool("@encrypted"); if (0 != ekeylen) { memset(ekey,0,ekeylen); free(ekey); if (!encrypted) { Owned<IException> e = MakeActivityWarning(&container, TE_EncryptionMismatch, "Ignoring encryption key provided as file '%s' was not published as encrypted", fetchFile->queryLogicalName()); queryJobChannel().fireException(e); } } else if (encrypted) throw MakeActivityException(this, 0, "File '%s' was published as encrypted but no encryption key provided", fetchFile->queryLogicalName()); mapping.setown(getFileSlaveMaps(fetchFile->queryLogicalName(), *fileDesc, container.queryJob().queryUserDescriptor(), container.queryJob().querySlaveGroup(), container.queryLocalOrGrouped(), false, NULL, fetchFile->querySuperFile())); mapping->serializeFileOffsetMap(offsetMapMb); addReadFile(fetchFile); } }
void putNext(const void *prev) { if (nextPut) return; nextPut = true; if (global && !lastNode()) { CMessageBuffer msg; msg.append(count); if (prev) { CMemoryRowSerializer msz(msg); ::queryRowSerializer(input)->serialize(msz, (const byte *)prev); } if (!queryJobChannel().queryJobComm().send(msg, queryJobChannel().queryMyRank()+1, mpTag)) // to next return; } }
const void *getNext() { const void *row = stream->ungroupedNextRow(); if (row) return row; else if (useRollover) { useRollover = false; // JCSMORE will generate time out log messages, while waiting for next nodes group rank_t myNode = queryJobChannel().queryMyRank(); nextNodeStream.setown(createRowStreamFromNode(*this, myNode+1, queryJobChannel().queryJobComm(), mpTag, abortSoon)); stream.set(nextNodeStream); return stream->nextRow(); } else return NULL; }
virtual void onInputFinished(rowcount_t count) override { if (container.queryLocalOrGrouped()) return; CMessageBuffer msg; msg.append(numSets*sizeof(rowcount_t), counts); queryJobChannel().queryJobComm().send(msg, 0, mpTag); }
void sendResult(rowcount_t r) { if (resultSent) return; resultSent = true; CMessageBuffer mb; mb.append(r); queryJobChannel().queryJobComm().send(mb, 0, mpTag); }
void sendResult(const void *row, IOutputRowSerializer *serializer, rank_t dst) { CMessageBuffer mb; DelayedSizeMarker sizeMark(mb); if (row&&hadElement) { CMemoryRowSerializer mbs(mb); serializer->serialize(mbs,(const byte *)row); sizeMark.write(); } queryJobChannel().queryJobComm().send(mb, dst, mpTag); }
virtual void preStart(size32_t parentExtractSz, const byte *parentExtract) { CSortBaseActivityMaster::preStart(parentExtractSz, parentExtract); ActPrintLog("preStart"); imaster = CreateThorSorterMaster(this); unsigned s=0; for (; s<container.queryJob().querySlaves(); s++) { SocketEndpoint ep; ep.deserialize(queryInitializationData(s)); // this is a bit of a Kludge until we get proper MP Thor imaster->AddSlave(&queryJobChannel().queryJobComm(), s+1, ep,mpTagRPC); } }
virtual void process() { if (totalCountKnown) return; if (container.queryLocalOrGrouped()) return; totalCount = ::getCount(*this, container.queryJob().querySlaves(), stopAfter, mpTag); if (totalCount > stopAfter) totalCount = stopAfter; CMessageBuffer msg; msg.append(totalCount); if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000)) throw MakeThorException(0, "Failed to give result to slave"); }
void getTallies() // NB: not called on first node. { CMessageBuffer msg; if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag)) return; memcpy(tallies, msg.readDirect(numSets*sizeof(unsigned)), numSets*sizeof(unsigned)); #if THOR_TRACE_LEVEL >= 5 StringBuffer s; unsigned idx=0; for (; idx<numSets; idx++) s.append("[").append(tallies[idx]).append("]"); ActPrintLog("CHOOSESETS: Incoming count = %s", s.str()); #endif }
virtual void process() { IRecordSize *recordSize = helper->queryOutputMeta(); Owned<IThorRowInterfaces> rowIf = createThorRowInterfaces(queryRowManager(), helper->queryOutputMeta(), queryId(), queryCodeContext()); OwnedConstThorRow result = getAggregate(*this, container.queryJob().querySlaves(), *rowIf, *helper, mpTag); if (!result) return; CMessageBuffer msg; CMemoryRowSerializer mbs(msg); rowIf->queryRowSerializer()->serialize(mbs, (const byte *)result.get()); if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000)) throw MakeThorException(0, "Failed to give result to slave"); }
void process() { start(); processed = 0; processed = THORDATALINK_STARTED; OwnedConstThorRow row = inputStream->ungroupedNextRow(); CMessageBuffer mb; DelayedSizeMarker sizeMark(mb); if (row) { CMemoryRowSerializer msz(mb); ::queryRowSerializer(input)->serialize(msz,(const byte *)row.get()); sizeMark.write(); processed++; } queryJobChannel().queryJobComm().send(mb, 0, masterMpTag); }
void process() { CWorkUnitWriteMasterBase::process(); unsigned nslaves = container.queryJob().querySlaves(); CMessageBuffer mb; unsigned s=0; for (; s<nslaves; s++) { loop { if (!queryJobChannel().queryJobComm().send(mb, s+1, mpTag)) return; if (!receiveMsg(mb, s+1, mpTag)) return; if (0 == mb.length()) break; unsigned numGot; mb.read(numGot); unsigned l=mb.remaining(); if (workunitWriteLimit && totalSize+resultData.length()+l > workunitWriteLimit) { StringBuffer errMsg("Dataset too large to output to workunit (limit is set to "); errMsg.append(workunitWriteLimit/0x100000).append(") megabytes, in result ("); if (resultName.length()) errMsg.append("name=").append(resultName); else errMsg.append("sequence=").append(resultSeq); errMsg.append(")"); throw MakeThorException(TE_WorkUnitWriteLimitExceeded, "%s", errMsg.str()); } resultData.append(l, mb.readDirect(l)); mb.clear(); numResults += numGot; if (-1 != flushThreshold && resultData.length() >= (unsigned)flushThreshold) flushResults(); } } flushResults(true); }
rowcount_t aggregateToLimit() { rowcount_t total = 0; ICommunicator &comm = queryJobChannel().queryJobComm(); unsigned slaves = container.queryJob().querySlaves(); unsigned s; for (s=0; s<slaves; s++) { CMessageBuffer msg; rank_t sender; if (!receiveMsg(msg, RANK_ALL, mpTag, &sender)) return 0; if (abortSoon) return 0; rowcount_t count; msg.read(count); total += count; if (total > limit) break; } return total; }
void process() { unsigned slaves = container.queryJob().querySlaves(); IHThorLimitArg *helper = (IHThorLimitArg *)queryHelper(); rowcount_t rowLimit = (rowcount_t)helper->getRowLimit(); rowcount_t total = 0; while (slaves--) { CMessageBuffer mb; if (!receiveMsg(mb, RANK_ALL, mpTag, NULL)) return; if (abortSoon) return; rowcount_t count; mb.read(count); total += count; if (total > rowLimit) break; } switch (container.getKind()) { case TAKcreaterowlimit: case TAKskiplimit: { unsigned slaves = container.queryJob().querySlaves(); CMessageBuffer mb; mb.append(total); queryJobChannel().queryJobComm().send(mb, RANK_ALL_OTHER, mpTag); break; } case TAKlimit: { if (total > rowLimit) helper->onLimitExceeded(); break; } } }
bool sendLoopingCount(unsigned n, unsigned emptyIterations) { if (!container.queryLocalOrGrouped()) { if (global || (lastMaxEmpty && (0 == emptyIterations)) || (!lastMaxEmpty && (emptyIterations>maxEmptyLoopIterations)) || ((0 == n) && (0 == emptyIterations))) { CMessageBuffer msg; // inform master starting msg.append(n); msg.append(emptyIterations); queryJobChannel().queryJobComm().send(msg, 0, mpTag); if (!global) { lastMaxEmpty = emptyIterations>maxEmptyLoopIterations; return true; } receiveMsg(msg, 0, mpTag); bool ok; msg.read(ok); return ok; } } return true; }
void CDiskWriteSlaveActivityBase::abort() { ProcessSlaveActivity::abort(); if (!rfsQueryParallel && dlfn.isExternal() && !firstNode()) cancelReceiveMsg(queryJobChannel().queryMyRank()-1, mpTag); }
virtual void init(MemoryBuffer &data, MemoryBuffer &slaveData) override { mpTagRPC = container.queryJobChannel().deserializeMPTag(data); mptag_t barrierTag = container.queryJobChannel().deserializeMPTag(data); barrier.setown(container.queryJobChannel().createBarrier(barrierTag)); portbase = allocPort(NUMSLAVEPORTS); ActPrintLog("MSortSlaveActivity::init portbase = %d, mpTagRPC = %d",portbase,(int)mpTagRPC); server.setLocalHost(portbase); helper = (IHThorSortArg *)queryHelper(); sorter.setown(CreateThorSorter(this, server,&container.queryJob().queryIDiskUsage(),&queryJobChannel().queryJobComm(),mpTagRPC)); server.serialize(slaveData); }
void CDiskWriteSlaveActivityBase::open() { if (dlfn.isExternal() && !firstNode()) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (!rfsQueryParallel) { ActPrintLog("Blocked, waiting for previous part to complete write"); CMessageBuffer msg; if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag)) return; rowcount_t prevRows; msg.read(prevRows); msg.read(tempExternalName); // reuse temp filename, last node will rename ActPrintLog("Previous write row count = %" RCPF "d", prevRows); } } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; bool extend = 0 != (diskHelperBase->getFlags() & TDWextend); if (extend) ActPrintLog("Extending file %s", fName.get()); /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression. * NB: only for FLAT files, not CSV or XML */ size32_t diskRowMinSz = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { diskRowMinSz = diskRowMeta->getMinRecordSize(); if (grouped) diskRowMinSz += 1; } calcFileCrc = true; bool external = dlfn.isExternal(); bool query = dlfn.isQuery(); if (query && compress) UNIMPLEMENTED; unsigned twFlags = external ? TW_External : 0; if (query || (external && !firstNode())) twFlags |= TW_Direct; if (!external || (!query && lastNode())) twFlags |= TW_RenameToPrimary; if (extend||(external&&!query)) twFlags |= TW_Extend; Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL); if (compress) { ActPrintLog("Performing row compression on output file: %s", fName.get()); // NB: block compressed output has implicit crc of 0, no need to calculate in row writer. calcFileCrc = false; } Owned<IFileIOStream> stream; if (wantRaw()) { outraw.setown(createBufferedIOStream(iFileIO)); stream.set(outraw); } else { stream.setown(createIOStream(iFileIO)); unsigned rwFlags = 0; if (grouped) rwFlags |= rw_grouped; if (calcFileCrc) rwFlags |= rw_crc; out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags)); } if (extend || (external && !query)) stream->seek(0,IFSend); ActPrintLog("Created output stream for %s", fName.get()); }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); loop { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; loop { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); doStopInput(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); loop { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } }
void prepareKey(IDistributedFile *index) { IDistributedFile *f = index; IDistributedSuperFile *super = f->querySuperFile(); unsigned nparts = f->numParts(); // includes tlks if any, but unused in array performPartLookup.ensure(nparts); bool checkTLKConsistency = (nullptr != super) && !localKey && (0 != (TIRsorted & indexBaseHelper->getFlags())); if (nofilter) { while (nparts--) performPartLookup.append(true); if (!checkTLKConsistency) return; } else { while (nparts--) performPartLookup.append(false); // parts to perform lookup set later } Owned<IDistributedFileIterator> iter; if (super) { iter.setown(super->getSubFileIterator(true)); verifyex(iter->first()); f = &iter->query(); } unsigned width = f->numParts(); if (!localKey) --width; assertex(width); unsigned tlkCrc = 0; bool first = true; unsigned superSubIndex=0; bool fileCrc = false, rowCrc = false; for (;;) { Owned<IDistributedFilePart> part = f->getPart(width); if (checkTLKConsistency) { unsigned _tlkCrc; if (part->getCrc(_tlkCrc)) fileCrc = true; else if (part->queryAttributes().hasProp("@crc")) // NB: key "@crc" is not a crc on the file, but data within. { _tlkCrc = part->queryAttributes().getPropInt("@crc"); rowCrc = true; } else if (part->queryAttributes().hasProp("@tlkCrc")) // backward compat. { _tlkCrc = part->queryAttributes().getPropInt("@tlkCrc"); rowCrc = true; } else { if (rowCrc || fileCrc) { checkTLKConsistency = false; Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, some crc attributes are missing", super->queryLogicalName()); queryJobChannel().fireException(e); } } if (rowCrc && fileCrc) { checkTLKConsistency = false; Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, due to mixed crc types.", super->queryLogicalName()); queryJobChannel().fireException(e); } if (checkTLKConsistency) { if (first) { tlkCrc = _tlkCrc; first = false; } else if (tlkCrc != _tlkCrc) throw MakeActivityException(this, 0, "Sorted output on super files comprising of non coparitioned sub keys is not supported (TLK's do not match)"); } } if (!nofilter) { Owned<IKeyIndex> keyIndex; unsigned copy; for (copy=0; copy<part->numCopies(); copy++) { RemoteFilename rfn; OwnedIFile ifile = createIFile(part->getFilename(rfn,copy)); if (ifile->exists()) { StringBuffer remotePath; rfn.getRemotePath(remotePath); unsigned crc = 0; part->getCrc(crc); keyIndex.setown(createKeyIndex(remotePath.str(), crc, false, false)); break; } } if (!keyIndex) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", index->queryLogicalName()); unsigned fixedSize = indexBaseHelper->queryDiskRecordSize()->querySerializedDiskMeta()->getFixedSize(); // used only if fixed Owned <IKeyManager> tlk = createLocalKeyManager(keyIndex, fixedSize, NULL); indexBaseHelper->createSegmentMonitors(tlk); tlk->finishSegmentMonitors(); tlk->reset(); while (tlk->lookup(false)) { if (tlk->queryFpos()) performPartLookup.replace(true, (aindex_t)(super?super->numSubFiles(true)*(tlk->queryFpos()-1)+superSubIndex:tlk->queryFpos()-1)); } } if (!super||!iter->next()) break; superSubIndex++; f = &iter->query(); if (width != f->numParts()-1) throw MakeActivityException(this, 0, "Super key %s, with mixture of sub key width are not supported.", f->queryLogicalName()); } }
virtual void process() { ActPrintLog("GlobalMergeActivityMaster::process"); CMasterActivity::process(); IHThorMergeArg *helper = (IHThorMergeArg *)queryHelper(); Owned<IThorRowInterfaces> rowif = createRowInterfaces(helper->queryOutputMeta()); CThorKeyArray sample(*this, rowif,helper->querySerialize(),helper->queryCompare(),helper->queryCompareKey(),helper->queryCompareRowKey()); unsigned n = container.queryJob().querySlaves(); mptag_t *replytags = new mptag_t[n]; mptag_t *intertags = new mptag_t[n]; unsigned i; for (i=0;i<n;i++) { replytags[i] = TAG_NULL; intertags[i] = TAG_NULL; } try { for (i=0;i<n;i++) { if (abortSoon) return; CMessageBuffer mb; #ifdef _TRACE ActPrintLog("Merge process, Receiving on tag %d",replyTag); #endif rank_t sender; if (!receiveMsg(mb, RANK_ALL, replyTag, &sender)||abortSoon) return; #ifdef _TRACE ActPrintLog("Merge process, Received sample from %d",sender); #endif sender--; assertex((unsigned)sender<n); assertex(replytags[(unsigned)sender]==TAG_NULL); deserializeMPtag(mb,replytags[(unsigned)sender]); deserializeMPtag(mb,intertags[(unsigned)sender]); sample.deserialize(mb,true); } ActPrintLog("GlobalMergeActivityMaster::process samples merged"); sample.createSortedPartition(n); ActPrintLog("GlobalMergeActivityMaster::process partition generated"); for (i=0;i<n;i++) { if (abortSoon) break; CMessageBuffer mb; mb.append(n); for (unsigned j = 0;j<n;j++) serializeMPtag(mb,intertags[j]); sample.serialize(mb); #ifdef _TRACE ActPrintLog("Merge process, Replying to node %d tag %d",i+1,replytags[i]); #endif if (!queryJobChannel().queryJobComm().send(mb, (rank_t)i+1, replytags[i])) break; } } catch (IException *e) { delete [] replytags; delete [] intertags; ActPrintLog(e, "MERGE"); throw; } delete [] replytags; delete [] intertags; ActPrintLog("GlobalMergeActivityMaster::process exit"); }