// IThorDataLink methods virtual void start() { ActivityTimer s(totalCycles, timeActivities); input.setown(createDataLinkSmartBuffer(this,inputs.item(0),PULL_SMART_BUFFER_SIZE,true,false,RCUNBOUND,NULL,false,&container.queryJob().queryIDiskUsage())); startInput(input); dataLinkStart(); }
void start() { ActivityTimer s(totalCycles, timeActivities, NULL); count = 0; eof = nextPut = false; inrowif.set(::queryRowInterfaces(inputs.item(0))); if (global) // only want lookahead if global (hence serial) input.setown(createDataLinkSmartBuffer(this, inputs.item(0),ITERATE_SMART_BUFFER_SIZE,isSmartBufferSpillNeeded(this),false,RCUNBOUND,NULL,false,&container.queryJob().queryIDiskUsage())); // only allow spill if input can stall else input.set(inputs.item(0)); try { startInput(input); } catch (IException *e) { ActPrintLog(e,"ITERATE"); throw; } dataLinkStart("ITERATOR", container.queryId()); }
void CDiskWriteSlaveActivityBase::open() { if (dlfn.isExternal() && !firstNode()) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (!rfsQueryParallel) { ActPrintLog("Blocked, waiting for previous part to complete write"); CMessageBuffer msg; if (!receiveMsg(msg, container.queryJob().queryMyRank()-1, mpTag)) return; rowcount_t prevRows; msg.read(prevRows); msg.read(tempExternalName); // reuse temp filename, last node will rename ActPrintLog("Previous write row count = %"RCPF"d", prevRows); } } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; bool extend = 0 != (diskHelperBase->getFlags() & TDWextend); if (extend) ActPrintLog("Extending file %s", fName.get()); size32_t exclsz = 0; calcFileCrc = true; bool external = dlfn.isExternal(); bool query = dlfn.isQuery(); if (query && compress) UNIMPLEMENTED; bool direct = query || (external && !firstNode()); bool rename = !external || (!query && lastNode()); Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, exclsz, compress, extend||(external&&!query), ecomp, this, direct, rename, &abortSoon, (external&&!query) ? &tempExternalName : NULL); if (compress) { ActPrintLog("Performing row compression on output file: %s", fName.get()); calcFileCrc = false; } Owned<IFileIOStream> stream; if (wantRaw()) { outraw.setown(createBufferedIOStream(iFileIO)); stream.set(outraw); } else { stream.setown(createIOStream(iFileIO)); out.setown(createRowWriter(stream,::queryRowSerializer(input),::queryRowAllocator(input),grouped,calcFileCrc,false)); // flushed by close } CDfsLogicalFileName dlfn; dlfn.set(logicalFilename); if (extend || (external && !query)) stream->seek(0,IFSend); ActPrintLog("Created output stream for %s", fName.get()); }
void process() { ActPrintLog("INDEXWRITE: Start"); ThorDataLinkMetaInfo info; inputs.item(0)->getMetaInfo(info); outRowAllocator.set(createThorRowAllocator(helper->queryDiskRecordSize(), container.queryId())); if (refactor) { assertex(isLocal); input.setown(createDataLinkSmartBuffer(this, inputs.item(0), INDEXWRITE_SMART_BUFFER_SIZE, true, false, RCUNBOUND, this, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (active) { unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0); assertex(0 == container.queryJob().querySlaves() % targetWidth); unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth; unsigned myPart = container.queryJob().queryMyRank(); IArrayOf<IRowStream> streams; streams.append(*LINK(input)); --partsPerNode; // Should this be merging 1,11,21,31 etc. unsigned p=0; unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1)); for (; p<partsPerNode; p++) { streams.append(*createRowStreamFromNode(*this, fromPart++, container.queryJob().queryJobComm(), mpTag, abortSoon)); } ICompare *icompare = helper->queryCompare(); assertex(icompare); Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter; input.setown(createRowStreamToDataLinkAdapter(inputs.item(0), createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter))); } else // serve nodes, creating merged parts rowServer.setown(createRowServer(this, input, container.queryJob().queryJobComm(), mpTag)); } else if (singlePartKey) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), INDEXWRITE_SMART_BUFFER_SIZE, true, false, RCUNBOUND, this, false, &container.queryJob().queryIDiskUsage())); startInput(input); } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; // single part key support // has to serially pull all data fron nodes 2-N // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature. unsigned node = container.queryJob().queryMyRank(); if (singlePartKey) { if (1 == node) { try { open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize()); loop { OwnedConstThorRow row = input->ungroupedNextRow(); if (!row) break; if (abortSoon) return; processRow(row); } unsigned node = 2; while (node <= container.queryJob().querySlaves()) { Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input); CMessageBuffer mb; Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb); CThorStreamDeserializerSource rowSource; rowSource.setStream(stream); bool successSR; loop { { BooleanOnOff tf(receivingTag2); successSR = container.queryJob().queryJobComm().sendRecv(mb, node, mpTag2); } if (successSR) { if (rowSource.eos()) break; Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input); do { RtlDynamicRowBuilder rowBuilder(allocator); size32_t sz = deserializer->deserialize(rowBuilder, rowSource); OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz); processRow(fRow); } while (!rowSource.eos()); } } node++; } } catch (CATCHALL) { close(*partDesc, partCrc, true); throw; } close(*partDesc, partCrc, true); doStopInput(); } else { CMessageBuffer mb; CMemoryRowSerializer mbs(mb); Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input); loop { BooleanOnOff tf(receivingTag2); if (container.queryJob().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more.. { if (abortSoon) break; mb.clear(); do { OwnedConstThorRow row = input->ungroupedNextRow(); if (!row) break; serializer->serialize(mbs, (const byte *)row.get()); } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row if (!container.queryJob().queryJobComm().reply(mb)) throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node); if (0 == mb.length()) break; } } } }
void CDiskWriteSlaveActivityBase::open() { if (dlfn.isExternal() && !firstNode()) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (!rfsQueryParallel) { ActPrintLog("Blocked, waiting for previous part to complete write"); CMessageBuffer msg; if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag)) return; rowcount_t prevRows; msg.read(prevRows); msg.read(tempExternalName); // reuse temp filename, last node will rename ActPrintLog("Previous write row count = %" RCPF "d", prevRows); } } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; bool extend = 0 != (diskHelperBase->getFlags() & TDWextend); if (extend) ActPrintLog("Extending file %s", fName.get()); /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression. * NB: only for FLAT files, not CSV or XML */ size32_t diskRowMinSz = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { diskRowMinSz = diskRowMeta->getMinRecordSize(); if (grouped) diskRowMinSz += 1; } calcFileCrc = true; bool external = dlfn.isExternal(); bool query = dlfn.isQuery(); if (query && compress) UNIMPLEMENTED; unsigned twFlags = external ? TW_External : 0; if (query || (external && !firstNode())) twFlags |= TW_Direct; if (!external || (!query && lastNode())) twFlags |= TW_RenameToPrimary; if (extend||(external&&!query)) twFlags |= TW_Extend; Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL); if (compress) { ActPrintLog("Performing row compression on output file: %s", fName.get()); // NB: block compressed output has implicit crc of 0, no need to calculate in row writer. calcFileCrc = false; } Owned<IFileIOStream> stream; if (wantRaw()) { outraw.setown(createBufferedIOStream(iFileIO)); stream.set(outraw); } else { stream.setown(createIOStream(iFileIO)); unsigned rwFlags = 0; if (grouped) rwFlags |= rw_grouped; if (calcFileCrc) rwFlags |= rw_crc; out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags)); } if (extend || (external && !query)) stream->seek(0,IFSend); ActPrintLog("Created output stream for %s", fName.get()); }