virtual void init() { CSortBaseActivityMaster::init(); IHThorSortArg *helper = (IHThorSortArg *)queryHelper(); IHThorAlgorithm *algo = static_cast<IHThorAlgorithm *>(helper->selectInterface(TAIalgorithm_1)); OwnedRoxieString algoname(algo->getAlgorithm()); unsigned flags = algo->getAlgorithmFlags(); if (algoname && (0 != stricmp(algoname, "quicksort"))) { Owned<IException> e = MakeActivityException(this, 0, "Ignoring, unsupported sort order algorithm '%s'", algoname.get()); reportExceptionToWorkunit(container.queryJob().queryWorkUnit(), e); } OwnedRoxieString cosortlogname(helper->getSortedFilename()); if (cosortlogname&&*cosortlogname) { Owned<IDistributedFile> coSortFile = queryThorFileManager().lookup(container.queryJob(), cosortlogname); addReadFile(coSortFile); Owned<IFileDescriptor> fileDesc = coSortFile->getFileDescriptor(); unsigned o; for (o=0; o<fileDesc->numParts(); o++) { Owned<IPartDescriptor> partDesc = fileDesc->getPart(o); if (cosortfilenames.length()) cosortfilenames.append("|"); // JCSMORE - picking the primary here, means no automatic use of backup copy, could use RMF's possibly. getPartFilename(*partDesc, 0, cosortfilenames); } } }
void removeFiles(IPartDescriptor &partDesc) { StringBuffer partFname; getPartFilename(partDesc, 0, partFname); Owned<IFile> primary = createIFile(partFname.str()); try { primary->remove(); } catch (IException *e) { ActPrintLog(e, "Failed to remove file: %s", partFname.str()); e->Release(); } catch (CATCHALL) { ActPrintLog("Failed to remove: %s", partFname.str()); } }
void CDiskWriteSlaveActivityBase::process() { calcFileCrc = false; uncompressedBytesWritten = 0; replicateDone = 0; StringBuffer tmpStr; fName.set(getPartFilename(*partDesc, 0, tmpStr).str()); if (diskHelperBase->getFlags() & TDXtemporary && !container.queryJob().queryUseCheckpoints()) container.queryTempHandler()->registerFile(fName, container.queryOwner().queryGraphId(), usageCount, true); try { ActPrintLog("handling fname : %s", fName.get()); try { open(); assertex(out||outraw); write(); } catch (IException *) { abortSoon = true; try { close(); } catch (IException *e) { EXCLOG(e, "close()"); e->Release(); } throw; } catch (CATCHALL) { abortSoon = true; try { close(); } catch (IException *e) { EXCLOG(e, "close()"); e->Release(); } throw; } close(); } catch (IException *) { calcFileCrc = false; throw; } catch(CATCHALL) { calcFileCrc = false; throw; } unsigned crc = compress?~0:fileCRC.get(); ActPrintLog("Wrote %" RCPF "d records%s", processed & THORDATALINK_COUNT_MASK, calcFileCrc?StringBuffer(", crc=0x").appendf("%X", crc).str() : ""); }
void close(IPartDescriptor &partDesc, unsigned &crc, bool addMeta=false) { StringBuffer partFname; getPartFilename(partDesc, 0, partFname); Owned<IException> e; try { if (builder) { if (addMeta && metadata) { builder->finish(metadata, &crc); } else builder->finish(&crc); } } catch (IException *_e) { ActPrintLog(_e, "Error closing file: %s", partFname.str()); abortSoon = true; e.setown(_e); } catch (CATCHALL) { abortSoon = true; e.setown(MakeActivityException(this, 0, "INDEXWRITE: Error closing file: %s - unknown exception", partFname.str())); } try { metadata.clear(); builder.clear(); } catch (IException *_e) { ActPrintLog(_e, "Error closing file: %s", partFname.str()); _e->Release(); } if (abortSoon) removeFiles(partDesc); if (e) throw LINK(e); }
void open(IPartDescriptor &partDesc, bool isTopLevel, bool isVariable) { StringBuffer partFname; getPartFilename(partDesc, 0, partFname); bool compress=false; OwnedIFileIO iFileIO = createMultipleWrite(this, partDesc, 0, TW_RenameToPrimary, compress, NULL, this, &abortSoon); Owned<IFileIOStream> out = createBufferedIOStream(iFileIO); ActPrintLog("INDEXWRITE: created fixed output stream %s", partFname.str()); unsigned flags = COL_PREFIX; if (TIWrowcompress & helper->getFlags()) flags |= HTREE_COMPRESSED_KEY|HTREE_QUICK_COMPRESSED_KEY; else if (!(TIWnolzwcompress & helper->getFlags())) flags |= HTREE_COMPRESSED_KEY; if (!isLocal) flags |= HTREE_FULLSORT_KEY; if (isVariable) flags |= HTREE_VARSIZE; buildUserMetadata(metadata); buildLayoutMetadata(metadata); unsigned nodeSize = metadata ? metadata->getPropInt("_nodeSize", NODESIZE) : NODESIZE; builder.setown(createKeyBuilder(out, flags, maxDiskRecordSize, nodeSize, helper->getKeyedSize(), isTopLevel ? 0 : totalCount)); }
virtual void process() { ActPrintLog("process"); CMasterActivity::process(); IHThorSortArg *helper = (IHThorSortArg *)queryHelper(); StringBuffer skewV; double skewError; container.queryJob().getWorkUnitValue("overrideSkewError", skewV); if (skewV.length()) skewError = atof(skewV.str()); else { skewError = helper->getSkew(); if (!skewError) { container.queryJob().getWorkUnitValue("defaultSkewError", skewV.clear()); if (skewV.length()) skewError = atof(skewV.str()); } } container.queryJob().getWorkUnitValue("defaultSkewWarning", skewV.clear()); double defaultSkewWarning = skewV.length() ? atof(skewV.str()) : 0; double skewWarning = defaultSkewWarning; unsigned __int64 skewThreshold = container.queryJob().getWorkUnitValueInt("overrideSkewThreshold", 0); if (!skewThreshold) { skewThreshold = helper->getThreshold(); if (!skewThreshold) skewThreshold = container.queryJob().getWorkUnitValueInt("defaultSkewThreshold", 0); } StringBuffer cosortfilenames; const char *cosortlogname = helper->getSortedFilename(); if (cosortlogname&&*cosortlogname) { Owned<IDistributedFile> file = queryThorFileManager().lookup(container.queryJob(), cosortlogname); Owned<IFileDescriptor> fileDesc = file->getFileDescriptor(); queryThorFileManager().noteFileRead(container.queryJob(), file); unsigned o; for (o=0; o<fileDesc->numParts(); o++) { Owned<IPartDescriptor> partDesc = fileDesc->getPart(o); if (cosortfilenames.length()) cosortfilenames.append("|"); // JCSMORE - picking the primary here, means no automatic use of backup copy, could use RMF's possibly. getPartFilename(*partDesc, 0, cosortfilenames); } } Owned<IRowInterfaces> rowif = createRowInterfaces(container.queryInput(0)->queryHelper()->queryOutputMeta(),queryActivityId(),queryCodeContext()); Owned<IRowInterfaces> auxrowif = createRowInterfaces(helper->querySortedRecordSize(),queryActivityId(),queryCodeContext()); try { imaster->SortSetup(rowif,helper->queryCompare(),helper->querySerialize(),cosortfilenames.length()!=0,true,cosortfilenames.toCharArray(),auxrowif); if (barrier->wait(false)) { // local sort complete size32_t maxdeviance=globals->getPropInt("@sort_max_deviance", 10*1024*1024); if (!imaster->Sort(skewThreshold,skewWarning,skewError,maxdeviance,true,false,false,(unsigned)globals->getPropInt("@smallSortThreshold"))) { Owned<IThorException> e = MakeActivityException(this, TE_SortFailedSkewExceeded,"SORT failed, skew exceeded"); fireException(e); } barrier->wait(false); // merge complete } imaster->SortDone(); } catch (IException *e) { ActPrintLog(e, "WARNING: exception during sort"); throw; } ::Release(imaster); ActPrintLog("process exit"); }
static void _doReplicate(CActivityBase *activity, IPartDescriptor &partDesc, ICopyFileProgress *iProgress) { StringBuffer primaryName; getPartFilename(partDesc, 0, primaryName);; RemoteFilename rfn; IFileDescriptor &fileDesc = partDesc.queryOwner(); unsigned copies = partDesc.numCopies(); unsigned c=1; for (; c<copies; c++) { unsigned replicateCopy; unsigned clusterNum = partDesc.copyClusterNum(c, &replicateCopy); rfn.clear(); partDesc.getFilename(c, rfn); StringBuffer dstName; rfn.getPath(dstName); assertex(dstName.length()); if (replicateCopy>0 ) { try { queryThor().queryBackup().backup(dstName.str(), primaryName.str()); } catch (IException *e) { Owned<IThorException> re = MakeActivityWarning(activity, e, "Failed to create replicate file '%s'", dstName.str()); e->Release(); activity->fireException(re); } } else // another primary { ActPrintLog(activity, "Copying to primary %s", dstName.str()); StringBuffer tmpName(dstName.str()); tmpName.append(".tmp"); OwnedIFile tmpIFile = createIFile(tmpName.str()); OwnedIFile srcFile = createIFile(primaryName.str()); CFIPScope fipScope(tmpName.str()); try { try { ensureDirectoryForFile(dstName.str()); ::copyFile(tmpIFile, srcFile, 0x100000, iProgress); } catch (IException *e) { IThorException *re = MakeActivityException(activity, e, "Failed to copy to tmp file '%s' from source file '%s'", tmpIFile->queryFilename(), srcFile->queryFilename()); e->Release(); throw re; } try { OwnedIFile dstIFile = createIFile(dstName.str()); dstIFile->remove(); tmpIFile->rename(pathTail(dstName.str())); } catch (IException *e) { IThorException *re = ThorWrapException(e, "Failed to rename '%s' to '%s'", tmpName.str(), dstName.str()); e->Release(); throw re; } } catch (IException *) { try { tmpIFile->remove(); } catch (IException *e) { ActPrintLog(&activity->queryContainer(), e, NULL); e->Release(); } throw; } } } }
virtual void process() override { ActPrintLog("INDEXWRITE: Start"); init(); IRowStream *stream = inputStream; ThorDataLinkMetaInfo info; input->getMetaInfo(info); outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize())); start(); if (refactor) { assertex(isLocal); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = queryJobChannel().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(stream)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)); stream = myInputStream; } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag)); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = queryJobChannel().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); for (;;) { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; for (;;) { { BooleanOnOff tf(receivingTag2); successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); stop(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); for (;;) { BooleanOnOff tf(receivingTag2); if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!queryJobChannel().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } } else { if (!refactor || active) { try { StringBuffer partFname; getPartFilename(*partDesc, 0, partFname); ActPrintLog("INDEXWRITE: process: handling fname : %s", partFname.str()); open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); ActPrintLog("INDEXWRITE: write"); BooleanOnOff tf(receiving); if (!refactor || !active) receiving = false; do { OwnedConstThorRow row = inputStream->ungroupedNextRow(); if (!row) break; processRow(row); } while (!abortSoon); ActPrintLog("INDEXWRITE: write level 0 complete"); } catch (CATCHALL) { close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); throw; } close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node); stop(); ActPrintLog("INDEXWRITE: Wrote %" RCPF "d records", processed & THORDATALINK_COUNT_MASK); if (buildTlk) { ActPrintLog("INDEXWRITE: sending rows"); NodeInfoArray tlkRows; CMessageBuffer msg; if (firstNode()) { if (processed & THORDATALINK_COUNT_MASK) { if (enableTlkPart0) tlkRows.append(* new CNodeInfo(0, firstRow.get(), firstRowSize, totalCount)); tlkRows.append(* new CNodeInfo(1, lastRow.get(), lastRowSize, totalCount)); } } else { if (processed & THORDATALINK_COUNT_MASK) { CNodeInfo row(queryJobChannel().queryMyRank(), lastRow.get(), lastRowSize, totalCount); row.serialize(msg); } queryJobChannel().queryJobComm().send(msg, 1, mpTag); } if (firstNode()) { ActPrintLog("INDEXWRITE: Waiting on tlk to complete"); // JCSMORE if refactor==true, is rowsToReceive here right?? unsigned rowsToReceive = (refactor ? (tlkDesc->queryOwner().numParts()-1) : container.queryJob().querySlaves()) -1; // -1 'cos got my own in array already ActPrintLog("INDEXWRITE: will wait for info from %d slaves before writing TLK", rowsToReceive); while (rowsToReceive--) { msg.clear(); receiveMsg(msg, RANK_ALL, mpTag); // NH->JCS RANK_ALL_OTHER not supported for recv if (abortSoon) return; if (msg.length()) { CNodeInfo *ni = new CNodeInfo(); ni->deserialize(msg); tlkRows.append(*ni); } } tlkRows.sort(CNodeInfo::compare); StringBuffer path; getPartFilename(*tlkDesc, 0, path); ActPrintLog("INDEXWRITE: creating toplevel key file : %s", path.str()); try { open(*tlkDesc, true, helper->queryDiskRecordSize()->isVariableSize()); if (tlkRows.length()) { CNodeInfo &lastNode = tlkRows.item(tlkRows.length()-1); memset(lastNode.value, 0xff, lastNode.size); } ForEachItemIn(idx, tlkRows) { CNodeInfo &info = tlkRows.item(idx); builder->processKeyData((char *)info.value, info.pos, info.size); } close(*tlkDesc, tlkCrc, true); } catch (CATCHALL) { abortSoon = true; close(*tlkDesc, tlkCrc, true); removeFiles(*partDesc); throw; } } } else if (!isLocal && firstNode())