CWriteMasterBase::CWriteMasterBase(CMasterGraphElement *info) : CMasterActivity(info), diskStats(info->queryJob(), diskWriteRemoteStatistics) { publishReplicatedDone = !globals->getPropBool("@replicateAsync", true); replicateProgress.setown(new ProgressInfo(queryJob())); diskHelperBase = (IHThorDiskWriteArg *)queryHelper(); targetOffset = 0; }
void process() { ActPrintLog("INDEXWRITE: Start"); ThorDataLinkMetaInfo info; inputs.item(0)->getMetaInfo(info); outRowAllocator.set(queryJob().getRowAllocator(helper->queryDiskRecordSize(), container.queryId())); if (refactor) { assertex(isLocal); input.setown(createDataLinkSmartBuffer(this, inputs.item(0), INDEXWRITE_SMART_BUFFER_SIZE, true, false, RCUNBOUND, this, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = container.queryJob().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(input)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, container.queryJob().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; input.setown(createRowStreamToDataLinkAdapter(inputs.item(0), createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter))); } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, input, container.queryJob().queryJobComm(), mpTag)); } else if (singlePartKey) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), INDEXWRITE_SMART_BUFFER_SIZE, true, false, RCUNBOUND, this, false, &container.queryJob().queryIDiskUsage())); startInput(input); } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = container.queryJob().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); loop { OwnedConstThorRow row = input->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; loop { { BooleanOnOff tf(receivingTag2); successSR = container.queryJob().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); doStopInput(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); loop { BooleanOnOff tf(receivingTag2); if (container.queryJob().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = input->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!container.queryJob().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } }
CDiskReadMasterBase::CDiskReadMasterBase(CMasterGraphElement *info) : CMasterActivity(info), diskStats(info->queryJob(), diskReadRemoteStatistics) { hash = NULL; inputProgress.setown(new ProgressInfo(queryJob())); }
void CWriteMasterBase::publish() { if (published) return; published = true; if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp))) updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed); IPropertyTree &props = fileDesc->queryProperties(); props.setPropInt64("@recordCount", recordsProcessed); if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints()) { if (0 != (diskHelperBase->getFlags() & TDWexpires)) setExpiryTime(props, diskHelperBase->getExpiryDays()); if (TDWupdate & diskHelperBase->getFlags()) { unsigned eclCRC; unsigned __int64 totalCRC; diskHelperBase->getUpdateCRCs(eclCRC, totalCRC); props.setPropInt("@eclCRC", eclCRC); props.setPropInt64("@totalCRC", totalCRC); } } container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters); if (!dlfn.isExternal()) { bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary); if (!temporary && (queryJob().querySlaves() < fileDesc->numParts())) { // create empty parts for a fileDesc being published that is larger than this clusters size32_t recordSize = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { recordSize = diskRowMeta->getMinRecordSize(); if (0 != (diskHelperBase->getFlags() & TDXgrouped)) recordSize += 1; } unsigned compMethod = COMPRESS_METHOD_LZW; // rowdiff used if recordSize > 0, else fallback to compMethod if (getOptBool(THOROPT_COMP_FORCELZW, false)) { recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod. compMethod = COMPRESS_METHOD_LZW; } else if (getOptBool(THOROPT_COMP_FORCEFLZ, false)) compMethod = COMPRESS_METHOD_FASTLZ; else if (getOptBool(THOROPT_COMP_FORCELZ4, false)) compMethod = COMPRESS_METHOD_LZ4; bool blockCompressed; bool compressed = fileDesc->isCompressed(&blockCompressed); for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++) { StringBuffer clusterName; fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore()); PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str()); unsigned p=0; while (p<fileDesc->numParts()) { if (p == targetOffset) p += queryJob().querySlaves(); IPartDescriptor *partDesc = fileDesc->queryPart(p); CDateTime createTime, modifiedTime; for (unsigned c=0; c<partDesc->numCopies(); c++) { RemoteFilename rfn; partDesc->getFilename(c, rfn); StringBuffer path; rfn.getPath(path); try { ensureDirectoryForFile(path.str()); OwnedIFile iFile = createIFile(path.str()); Owned<IFileIO> iFileIO; if (compressed) iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod)); else iFileIO.setown(iFile->open(IFOcreate)); dbgassertex(iFileIO.get()); iFileIO.clear(); // ensure copies have matching datestamps, as they would do normally (backupnode expects it) if (partDesc->numCopies() > 1) { if (0 == c) iFile->getTime(&createTime, &modifiedTime, NULL); else iFile->setTime(&createTime, &modifiedTime, NULL); } } catch (IException *e) { if (0 == c) throw; Owned<IThorException> e2 = MakeThorException(e); e->Release(); e2->setAction(tea_warning); queryJob().fireException(e2); } } partDesc->queryProperties().setPropInt64("@size", 0); p++; } clusterIdx++; } } queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL); } }