virtual void init() { HashDistributeMasterBase::init(); // JCSMORE should common up some with indexread IHThorKeyedDistributeArg *helper = (IHThorKeyedDistributeArg *)queryHelper(); StringBuffer scoped; OwnedRoxieString indexFileName(helper->getIndexFileName()); queryThorFileManager().addScope(container.queryJob(), indexFileName, scoped); file.setown(queryThorFileManager().lookup(container.queryJob(), indexFileName)); if (!file) throw MakeActivityException(this, 0, "KeyedDistribute: Failed to find key: %s", scoped.str()); if (0 == file->numParts()) throw MakeActivityException(this, 0, "KeyedDistribute: Can't distribute based on an empty key: %s", scoped.str()); checkFormatCrc(this, file, helper->getFormatCrc(), true); Owned<IFileDescriptor> fileDesc = file->getFileDescriptor(); Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1); if (!tlkDesc->queryProperties().hasProp("@kind") || 0 != stricmp("topLevelKey", tlkDesc->queryProperties().queryProp("@kind"))) throw MakeActivityException(this, 0, "Cannot distribute using a non-distributed key: '%s'", scoped.str()); unsigned location; OwnedIFile iFile; StringBuffer filePath; if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath)) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", file->queryLogicalName()); OwnedIFileIO iFileIO = iFile->open(IFOread); assertex(iFileIO); tlkMb.append(iFileIO->size()); ::read(iFileIO, 0, (size32_t)iFileIO->size(), tlkMb); queryThorFileManager().noteFileRead(container.queryJob(), file); }
void sendPartialCount(CSlaveActivity &activity, rowcount_t partialCount) { CMessageBuffer msg; msg.append(partialCount); if (!activity.queryContainer().queryJob().queryJobComm().send(msg, 0, activity.queryMpTag(), 5000)) throw MakeThorException(0, "Failed to give partial result to master"); }
virtual void validateFile(IDistributedFile *file) { IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper(); bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags()); bool isGrouped = fileDesc->isGrouped(); if (isGrouped != codeGenGrouped) { Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped"); queryJobChannel().fireException(e); } IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta(); if (recordSize->isFixedSize()) // fixed size { if (0 != fileDesc->queryProperties().getPropInt("@recordSize")) { size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize"); if (isGrouped) rSz--; // eog byte not to be included in this test. if (rSz != recordSize->getMinRecordSize()) throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize()); } if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags())) { size32_t rSz = recordSize->getMinRecordSize(); if (isGrouped) rSz++; if (rSz >= MIN_ROWCOMPRESS_RECSIZE) { Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get()); queryJobChannel().fireException(e); } } } if (0 == (TDRnocrccheck & helper->getFlags())) checkFormatCrc(this, file, helper->getFormatCrc(), false); }
void CPartialResultAggregator::sendResult(const void *row) { CMessageBuffer mb; if (row) { CMemoryRowSerializer mbs(mb); activity.queryRowSerializer()->serialize(mbs,(const byte *)row); } if (!activity.queryContainer().queryJob().queryJobComm().send(mb, 0, activity.queryMpTag(), 5000)) throw MakeThorException(0, "Failed to give partial result to master"); }
virtual unsigned hash(const void *data) { if (1 == count) return offsetTable[0].index; offset_t fpos = iFetchHandler.extractFpos(data); if (isLocalFpos(fpos)) return getLocalFposPart(fpos); const void *result = bsearch(&fpos, offsetTable, count, sizeof(FPosTableEntry), slaveLookup); if (!result) throw MakeThorException(TE_FetchOutOfRange, "FETCH: Offset not found in offset table; fpos=%"I64F"d", fpos); return ((FPosTableEntry *)result)->index; }
virtual void process() { if (totalCountKnown) return; if (container.queryLocalOrGrouped()) return; totalCount = ::getCount(*this, container.queryJob().querySlaves(), stopAfter, mpTag); if (totalCount > stopAfter) totalCount = stopAfter; CMessageBuffer msg; msg.append(totalCount); if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000)) throw MakeThorException(0, "Failed to give result to slave"); }
virtual void process() { CMasterActivity::process(); bool results = false; unsigned nslaves = container.queryJob().querySlaves(); while (nslaves--) { CMessageBuffer mb; if (abortSoon || !receiveMsg(mb, RANK_ALL, replyTag, NULL)) break; StringBuffer str; mb.getSender().getUrlStr(str); size32_t sz; mb.read(sz); if (sz) { if (results) throw MakeThorException(TE_UnexpectedMultipleSlaveResults, "Received greater than one result from slaves"); IHThorRemoteResultArg *helper = (IHThorRemoteResultArg *)queryHelper(); Owned<IThorRowInterfaces> resultRowIf = createRowInterfaces(helper->queryOutputMeta()); CThorStreamDeserializerSource mds(sz, mb.readDirect(sz)); RtlDynamicRowBuilder rowBuilder(resultRowIf->queryRowAllocator()); size32_t sz = resultRowIf->queryRowDeserializer()->deserialize(rowBuilder, mds); OwnedConstThorRow result = rowBuilder.finalizeRowClear(sz); helper->sendResult(result); results = true; } } if (!results && !abortSoon) { ActPrintLog("WARNING: no results"); IHThorRemoteResultArg *helper = (IHThorRemoteResultArg *)queryHelper(); //helper->sendResult(NULL); // Jake I think this always cores (so raise exception instead) throw MakeThorException(TE_UnexpectedMultipleSlaveResults, "Received no results from slaves"); } }
virtual void process() { IRecordSize *recordSize = helper->queryOutputMeta(); Owned<IThorRowInterfaces> rowIf = createThorRowInterfaces(queryRowManager(), helper->queryOutputMeta(), queryId(), queryCodeContext()); OwnedConstThorRow result = getAggregate(*this, container.queryJob().querySlaves(), *rowIf, *helper, mpTag); if (!result) return; CMessageBuffer msg; CMemoryRowSerializer mbs(msg); rowIf->queryRowSerializer()->serialize(mbs, (const byte *)result.get()); if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000)) throw MakeThorException(0, "Failed to give result to slave"); }
IRowStream * doGlobalSelfJoin() { #if THOR_TRACE_LEVEL > 5 ActPrintLog("SELFJOIN: Performing global self-join"); #endif sorter->Gather(::queryRowInterfaces(input), input, compare, NULL, NULL, keyserializer, NULL, false, isUnstable(), abortSoon, NULL); stopInput(input); input = NULL; if(abortSoon) { barrier->cancel(); return NULL; } if (!barrier->wait(false)) { Sleep(1000); // let original error through throw MakeThorException(TE_BarrierAborted,"SELFJOIN: Barrier Aborted"); } rowcount_t totalrows; return sorter->startMerge(totalrows); }
void process() { CWorkUnitWriteMasterBase::process(); unsigned nslaves = container.queryJob().querySlaves(); CMessageBuffer mb; unsigned s=0; for (; s<nslaves; s++) { loop { if (!container.queryJob().queryJobComm().send(mb, s+1, mpTag)) return; if (!receiveMsg(mb, s+1, mpTag)) return; if (0 == mb.length()) break; unsigned numGot; mb.read(numGot); unsigned l=mb.remaining(); if (workunitWriteLimit && totalSize+resultData.length()+l > workunitWriteLimit) { StringBuffer errMsg("Dataset too large to output to workunit (limit is set to "); errMsg.append(workunitWriteLimit/0x100000).append(") megabytes, in result ("); if (resultName.length()) errMsg.append("name=").append(resultName); else errMsg.append("sequence=").append(resultSeq); errMsg.append(")"); throw MakeThorException(TE_WorkUnitWriteLimitExceeded, "%s", errMsg.str()); } resultData.append(l, mb.readDirect(l)); mb.clear(); numResults += numGot; if (-1 != flushThreshold && resultData.length() >= (unsigned)flushThreshold) flushResults(); } } flushResults(true); }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); loop { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; loop { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); doStopInput(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); loop { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } }
void init(MemoryBuffer &data, MemoryBuffer &slaveData) { isLocal = 0 != (TIWlocal & helper->getFlags()); mpTag = container.queryJob().deserializeMPTag(data); mpTag2 = container.queryJob().deserializeMPTag(data); data.read(active); if (active) { data.read(logicalFilename); partDesc.setown(deserializePartFileDescriptor(data)); } data.read(singlePartKey); data.read(refactor); if (singlePartKey) buildTlk = false; else { data.read(buildTlk); if (firstNode()) { if (buildTlk) tlkDesc.setown(deserializePartFileDescriptor(data)); else if (!isLocal) // exising tlk then.. { OwnedRoxieString diName(helper->getDistributeIndexName()); assertex(diName.get()); tlkDesc.setown(deserializePartFileDescriptor(data)); unsigned c; data.read(c); while (c--) { RemoteFilename rf; rf.deserialize(data); if (!existingTlkIFile) { Owned<IFile> iFile = createIFile(rf); if (iFile->exists()) existingTlkIFile.set(iFile); } } if (!existingTlkIFile) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", diName.get()); } } } IOutputMetaData * diskSize = helper->queryDiskRecordSize(); assertex(!(diskSize->getMetaFlags() & MDFneedserializedisk)); if (diskSize->isVariableSize()) { if (TIWmaxlength & helper->getFlags()) maxDiskRecordSize = helper->getMaxKeySize(); else maxDiskRecordSize = KEYBUILD_MAXLENGTH; //Current default behaviour, could be improved in the future } else maxDiskRecordSize = diskSize->getFixedSize(); reportOverflow = false; }
void start() { ActivityTimer s(totalCycles, timeActivities); input = inputs.item(0); try { try { startInput(input); } catch (IException *e) { fireException(e); barrier->cancel(); throw; } catch (CATCHALL) { Owned<IException> e = MakeActivityException(this, 0, "Unknown exception starting sort input"); fireException(e); barrier->cancel(); throw; } dataLinkStart(); Linked<IRowInterfaces> rowif = queryRowInterfaces(input); Owned<IRowInterfaces> auxrowif = createRowInterfaces(helper->querySortedRecordSize(),queryActivityId(),queryCodeContext()); sorter->Gather( rowif, input, helper->queryCompare(), helper->queryCompareLeftRight(), NULL,helper->querySerialize(), NULL, false, isUnstable(), abortSoon, auxrowif); stopInput(input); input = NULL; if (abortSoon) { ActPrintLogEx(&queryContainer(), thorlog_null, MCwarning, "MSortSlaveActivity::start aborting"); barrier->cancel(); return; } } catch (IException *e) { fireException(e); barrier->cancel(); throw; } catch (CATCHALL) { Owned<IException> e = MakeActivityException(this, 0, "Unknown exception gathering sort input"); fireException(e); barrier->cancel(); throw; } ActPrintLog("SORT waiting barrier.1"); if (!barrier->wait(false)) { Sleep(1000); // let original error through throw MakeThorException(TE_BarrierAborted,"SORT: Barrier Aborted"); } ActPrintLog("SORT barrier.1 raised"); output.setown(sorter->startMerge(totalrows)); }
void CWriteMasterBase::publish() { if (published) return; published = true; if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp))) updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed); IPropertyTree &props = fileDesc->queryProperties(); props.setPropInt64("@recordCount", recordsProcessed); if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints()) { if (0 != (diskHelperBase->getFlags() & TDWexpires)) setExpiryTime(props, diskHelperBase->getExpiryDays()); if (TDWupdate & diskHelperBase->getFlags()) { unsigned eclCRC; unsigned __int64 totalCRC; diskHelperBase->getUpdateCRCs(eclCRC, totalCRC); props.setPropInt("@eclCRC", eclCRC); props.setPropInt64("@totalCRC", totalCRC); } } container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters); if (!dlfn.isExternal()) { bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary); if (!temporary && (queryJob().querySlaves() < fileDesc->numParts())) { // create empty parts for a fileDesc being published that is larger than this clusters size32_t recordSize = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { recordSize = diskRowMeta->getMinRecordSize(); if (0 != (diskHelperBase->getFlags() & TDXgrouped)) recordSize += 1; } unsigned compMethod = COMPRESS_METHOD_LZW; // rowdiff used if recordSize > 0, else fallback to compMethod if (getOptBool(THOROPT_COMP_FORCELZW, false)) { recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod. compMethod = COMPRESS_METHOD_LZW; } else if (getOptBool(THOROPT_COMP_FORCEFLZ, false)) compMethod = COMPRESS_METHOD_FASTLZ; else if (getOptBool(THOROPT_COMP_FORCELZ4, false)) compMethod = COMPRESS_METHOD_LZ4; bool blockCompressed; bool compressed = fileDesc->isCompressed(&blockCompressed); for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++) { StringBuffer clusterName; fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore()); PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str()); unsigned p=0; while (p<fileDesc->numParts()) { if (p == targetOffset) p += queryJob().querySlaves(); IPartDescriptor *partDesc = fileDesc->queryPart(p); CDateTime createTime, modifiedTime; for (unsigned c=0; c<partDesc->numCopies(); c++) { RemoteFilename rfn; partDesc->getFilename(c, rfn); StringBuffer path; rfn.getPath(path); try { ensureDirectoryForFile(path.str()); OwnedIFile iFile = createIFile(path.str()); Owned<IFileIO> iFileIO; if (compressed) iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod)); else iFileIO.setown(iFile->open(IFOcreate)); dbgassertex(iFileIO.get()); iFileIO.clear(); // ensure copies have matching datestamps, as they would do normally (backupnode expects it) if (partDesc->numCopies() > 1) { if (0 == c) iFile->getTime(&createTime, &modifiedTime, NULL); else iFile->setTime(&createTime, &modifiedTime, NULL); } } catch (IException *e) { if (0 == c) throw; Owned<IThorException> e2 = MakeThorException(e); e->Release(); e2->setAction(tea_warning); queryJob().fireException(e2); } } partDesc->queryProperties().setPropInt64("@size", 0); p++; } clusterIdx++; } } queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL); } }
virtual void init() { CMasterActivity::init(); OwnedRoxieString indexFileName(helper->getIndexFileName()); Owned<IDistributedFile> dataFile; Owned<IDistributedFile> indexFile = queryThorFileManager().lookup(container.queryJob(), indexFileName, false, 0 != (helper->getJoinFlags() & JFindexoptional), true); unsigned keyReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJKRR", 0); if (!keyReadWidth || keyReadWidth>container.queryJob().querySlaves()) keyReadWidth = container.queryJob().querySlaves(); initMb.clear(); initMb.append(indexFileName.get()); if (helper->diskAccessRequired()) numTags += 2; initMb.append(numTags); unsigned t=0; for (; t<numTags; t++) { tags[t] = container.queryJob().allocateMPTag(); initMb.append(tags[t]); } bool keyHasTlk = false; if (indexFile) { unsigned numParts = 0; localKey = indexFile->queryAttributes().getPropBool("@local"); if (container.queryLocalData() && !localKey) throw MakeActivityException(this, 0, "Keyed Join cannot be LOCAL unless supplied index is local"); checkFormatCrc(this, indexFile, helper->getIndexFormatCrc(), true); Owned<IFileDescriptor> indexFileDesc = indexFile->getFileDescriptor(); IDistributedSuperFile *superIndex = indexFile->querySuperFile(); unsigned superIndexWidth = 0; unsigned numSuperIndexSubs = 0; if (superIndex) { numSuperIndexSubs = superIndex->numSubFiles(true); bool first=true; // consistency check Owned<IDistributedFileIterator> iter = superIndex->getSubFileIterator(true); ForEach(*iter) { IDistributedFile &f = iter->query(); unsigned np = f.numParts()-1; IDistributedFilePart &part = f.queryPart(np); const char *kind = part.queryAttributes().queryProp("@kind"); bool hasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); // if last part not tlk, then deemed local (might be singlePartKey) if (first) { first = false; keyHasTlk = hasTlk; superIndexWidth = f.numParts(); if (keyHasTlk) --superIndexWidth; } else { if (hasTlk != keyHasTlk) throw MakeActivityException(this, 0, "Local/Single part keys cannot be mixed with distributed(tlk) keys in keyedjoin"); if (keyHasTlk && superIndexWidth != f.numParts()-1) throw MakeActivityException(this, 0, "Super sub keys of different width cannot be mixed with distributed(tlk) keys in keyedjoin"); } } if (keyHasTlk) numParts = superIndexWidth * numSuperIndexSubs; else numParts = superIndex->numParts(); } else { numParts = indexFile->numParts(); if (numParts) { const char *kind = indexFile->queryPart(indexFile->numParts()-1).queryAttributes().queryProp("@kind"); keyHasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); if (keyHasTlk) --numParts; } } if (numParts) { initMb.append(numParts); initMb.append(superIndexWidth); // 0 if not superIndex initMb.append((superIndex && superIndex->isInterleaved()) ? numSuperIndexSubs : 0); unsigned p=0; UnsignedArray parts; for (; p<numParts; p++) parts.append(p); indexFileDesc->serializeParts(initMb, parts); if (localKey) keyHasTlk = false; // not used initMb.append(keyHasTlk); if (keyHasTlk) { if (numSuperIndexSubs) initMb.append(numSuperIndexSubs); else initMb.append((unsigned)1); Owned<IDistributedFileIterator> iter; IDistributedFile *f; if (superIndex) { iter.setown(superIndex->getSubFileIterator(true)); f = &iter->query(); } else f = indexFile; loop { unsigned location; OwnedIFile iFile; StringBuffer filePath; Owned<IFileDescriptor> fileDesc = f->getFileDescriptor(); Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1); if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath)) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", f->queryLogicalName()); OwnedIFileIO iFileIO = iFile->open(IFOread); assertex(iFileIO); size32_t tlkSz = (size32_t)iFileIO->size(); initMb.append(tlkSz); ::read(iFileIO, 0, tlkSz, initMb); if (!iter || !iter->next()) break; f = &iter->query(); } } if (helper->diskAccessRequired()) { OwnedRoxieString fetchFilename(helper->getFileName()); if (fetchFilename) { dataFile.setown(queryThorFileManager().lookup(container.queryJob(), fetchFilename, false, 0 != (helper->getFetchFlags() & FFdatafileoptional), true)); if (dataFile) { if (superIndex) throw MakeActivityException(this, 0, "Superkeys and full keyed joins are not supported"); Owned<IFileDescriptor> dataFileDesc = getConfiguredFileDescriptor(*dataFile); void *ekey; size32_t ekeylen; helper->getFileEncryptKey(ekeylen,ekey); bool encrypted = dataFileDesc->queryProperties().getPropBool("@encrypted"); if (0 != ekeylen) { memset(ekey,0,ekeylen); free(ekey); if (!encrypted) { Owned<IException> e = MakeActivityWarning(&container, TE_EncryptionMismatch, "Ignoring encryption key provided as file '%s' was not published as encrypted", helper->getFileName()); container.queryJob().fireException(e); } } else if (encrypted) throw MakeActivityException(this, 0, "File '%s' was published as encrypted but no encryption key provided", fetchFilename.get()); unsigned dataReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJDRR", 0); if (!dataReadWidth || dataReadWidth>container.queryJob().querySlaves()) dataReadWidth = container.queryJob().querySlaves(); Owned<IGroup> grp = container.queryJob().querySlaveGroup().subset((unsigned)0, dataReadWidth); dataFileMapping.setown(getFileSlaveMaps(dataFile->queryLogicalName(), *dataFileDesc, container.queryJob().queryUserDescriptor(), *grp, false, false, NULL)); dataFileMapping->serializeFileOffsetMap(offsetMapMb.clear()); } else indexFile.clear(); } } } else
void prepareKey(IDistributedFile *index) { IDistributedFile *f = index; IDistributedSuperFile *super = f->querySuperFile(); unsigned nparts = f->numParts(); // includes tlks if any, but unused in array performPartLookup.ensure(nparts); bool checkTLKConsistency = (nullptr != super) && !localKey && (0 != (TIRsorted & indexBaseHelper->getFlags())); if (nofilter) { while (nparts--) performPartLookup.append(true); if (!checkTLKConsistency) return; } else { while (nparts--) performPartLookup.append(false); // parts to perform lookup set later } Owned<IDistributedFileIterator> iter; if (super) { iter.setown(super->getSubFileIterator(true)); verifyex(iter->first()); f = &iter->query(); } unsigned width = f->numParts(); if (!localKey) --width; assertex(width); unsigned tlkCrc = 0; bool first = true; unsigned superSubIndex=0; bool fileCrc = false, rowCrc = false; for (;;) { Owned<IDistributedFilePart> part = f->getPart(width); if (checkTLKConsistency) { unsigned _tlkCrc; if (part->getCrc(_tlkCrc)) fileCrc = true; else if (part->queryAttributes().hasProp("@crc")) // NB: key "@crc" is not a crc on the file, but data within. { _tlkCrc = part->queryAttributes().getPropInt("@crc"); rowCrc = true; } else if (part->queryAttributes().hasProp("@tlkCrc")) // backward compat. { _tlkCrc = part->queryAttributes().getPropInt("@tlkCrc"); rowCrc = true; } else { if (rowCrc || fileCrc) { checkTLKConsistency = false; Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, some crc attributes are missing", super->queryLogicalName()); queryJobChannel().fireException(e); } } if (rowCrc && fileCrc) { checkTLKConsistency = false; Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, due to mixed crc types.", super->queryLogicalName()); queryJobChannel().fireException(e); } if (checkTLKConsistency) { if (first) { tlkCrc = _tlkCrc; first = false; } else if (tlkCrc != _tlkCrc) throw MakeActivityException(this, 0, "Sorted output on super files comprising of non coparitioned sub keys is not supported (TLK's do not match)"); } } if (!nofilter) { Owned<IKeyIndex> keyIndex; unsigned copy; for (copy=0; copy<part->numCopies(); copy++) { RemoteFilename rfn; OwnedIFile ifile = createIFile(part->getFilename(rfn,copy)); if (ifile->exists()) { StringBuffer remotePath; rfn.getRemotePath(remotePath); unsigned crc = 0; part->getCrc(crc); keyIndex.setown(createKeyIndex(remotePath.str(), crc, false, false)); break; } } if (!keyIndex) throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", index->queryLogicalName()); unsigned fixedSize = indexBaseHelper->queryDiskRecordSize()->querySerializedDiskMeta()->getFixedSize(); // used only if fixed Owned <IKeyManager> tlk = createLocalKeyManager(keyIndex, fixedSize, NULL); indexBaseHelper->createSegmentMonitors(tlk); tlk->finishSegmentMonitors(); tlk->reset(); while (tlk->lookup(false)) { if (tlk->queryFpos()) performPartLookup.replace(true, (aindex_t)(super?super->numSubFiles(true)*(tlk->queryFpos()-1)+superSubIndex:tlk->queryFpos()-1)); } } if (!super||!iter->next()) break; superSubIndex++; f = &iter->query(); if (width != f->numParts()-1) throw MakeActivityException(this, 0, "Super key %s, with mixture of sub key width are not supported.", f->queryLogicalName()); } }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); for (;;) { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; for (;;) { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); stop(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); for (;;) { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } } else { if (!refactor || active) { try { StringBuffer partFname; getPartFilename(*partDesc, 0, partFname); ActPrintLog("INDEXWRITE: process: handling fname : %s", partFname.str()); open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); ActPrintLog("INDEXWRITE: write"); BooleanOnOff tf(receiving); if (!refactor || !active) receiving = false; do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; processRow(row); } while (!abortSoon); ActPrintLog("INDEXWRITE: write level 0 complete"); } catch (CATCHALL) { close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); throw; } close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); stop(); ActPrintLog("INDEXWRITE: Wrote %" RCPF "d records", processed & THORDATALINK_COUNT_MASK); if (buildTlk) { ActPrintLog("INDEXWRITE: sending rows"); NodeInfoArray tlkRows; CMessageBuffer msg; if (firstNode()) { if (processed & THORDATALINK_COUNT_MASK) { if (enableTlkPart0) tlkRows.append(* new CNodeInfo(0, firstRow.get(), firstRowSize, totalCount)); tlkRows.append(* new CNodeInfo(1, lastRow.get(), lastRowSize, totalCount)); } } else { if (processed & THORDATALINK_COUNT_MASK) { CNodeInfo row(queryJobChannel().queryMyRank(), lastRow.get(), lastRowSize, totalCount); row.serialize(msg); } queryJobChannel().queryJobComm().send(msg, 1, mpTag); } if (firstNode()) { ActPrintLog("INDEXWRITE: Waiting on tlk to complete"); // JCSMORE if refactor==true, is rowsToReceive here right?? unsigned rowsToReceive = (refactor ? (tlkDesc->queryOwner().numParts()-1) : container.queryJob().querySlaves()) -1; // -1 'cos got my own in array already ActPrintLog("INDEXWRITE: will wait for info from %d slaves before writing TLK", rowsToReceive); while (rowsToReceive--) { msg.clear(); receiveMsg(msg, RANK_ALL, mpTag); // NH->JCS RANK_ALL_OTHER not supported for recv if (abortSoon) return; if (msg.length()) { CNodeInfo *ni = new CNodeInfo(); ni->deserialize(msg); tlkRows.append(*ni); } } tlkRows.sort(CNodeInfo::compare); StringBuffer path; getPartFilename(*tlkDesc, 0, path); ActPrintLog("INDEXWRITE: creating toplevel key file : %s", path.str()); try { open(*tlkDesc, true, helper->queryDiskRecordSize()->isVariableSize()); if (tlkRows.length()) { CNodeInfo &lastNode = tlkRows.item(tlkRows.length()-1); memset(lastNode.value, 0xff, lastNode.size); } ForEachItemIn(idx, tlkRows) { CNodeInfo &info = tlkRows.item(idx); builder->processKeyData((char *)info.value, info.pos, info.size); } close(*tlkDesc, tlkCrc, true); } catch (CATCHALL) { abortSoon = true; close(*tlkDesc, tlkCrc, true); removeFiles(*partDesc); throw; } } } else if (!isLocal && firstNode())