// IThorDataLink methods
    virtual void start()
    {
        ActivityTimer s(totalCycles, timeActivities);
        input.setown(createDataLinkSmartBuffer(this,inputs.item(0),PULL_SMART_BUFFER_SIZE,true,false,RCUNBOUND,NULL,false,&container.queryJob().queryIDiskUsage()));
        startInput(input);
        dataLinkStart();
    }
    void start()
    {
        ActivityTimer s(totalCycles, timeActivities, NULL);
        count = 0;
        eof = nextPut = false;
        inrowif.set(::queryRowInterfaces(inputs.item(0)));
        if (global) // only want lookahead if global (hence serial)
            input.setown(createDataLinkSmartBuffer(this, inputs.item(0),ITERATE_SMART_BUFFER_SIZE,isSmartBufferSpillNeeded(this),false,RCUNBOUND,NULL,false,&container.queryJob().queryIDiskUsage())); // only allow spill if input can stall
        else
            input.set(inputs.item(0));
        try
        { 
            startInput(input); 
        }
        catch (IException *e)
        {
            ActPrintLog(e,"ITERATE");
            throw;
        }
        dataLinkStart("ITERATOR", container.queryId());

    }
void CDiskWriteSlaveActivityBase::open()
{
    if (dlfn.isExternal() && !firstNode())
    {
        input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage()));
        startInput(input);
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, container.queryJob().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %"RCPF"d", prevRows);
        }
    }
    else
    {
        input.set(inputs.item(0));
        startInput(input);
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());
    size32_t exclsz = 0;
    calcFileCrc = true;

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    bool direct = query || (external && !firstNode());
    bool rename = !external || (!query && lastNode());
    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, exclsz, compress, extend||(external&&!query), ecomp, this, direct, rename, &abortSoon, (external&&!query) ? &tempExternalName : NULL);

    if (compress)
    {
        ActPrintLog("Performing row compression on output file: %s", fName.get());
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(iFileIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(iFileIO));
        out.setown(createRowWriter(stream,::queryRowSerializer(input),::queryRowAllocator(input),grouped,calcFileCrc,false)); // flushed by close
    }
    CDfsLogicalFileName dlfn;
    dlfn.set(logicalFilename);
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s", fName.get());
}
    void process()
    {
        ActPrintLog("INDEXWRITE: Start");

        ThorDataLinkMetaInfo info;
        inputs.item(0)->getMetaInfo(info);
        outRowAllocator.set(createThorRowAllocator(helper->queryDiskRecordSize(), container.queryId()));
        if (refactor)
        {
            assertex(isLocal);
            input.setown(createDataLinkSmartBuffer(this, inputs.item(0), INDEXWRITE_SMART_BUFFER_SIZE, true, false, RCUNBOUND, this, false, &container.queryJob().queryIDiskUsage())); 
            startInput(input);

            if (active)
            {
                unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0);
                assertex(0 == container.queryJob().querySlaves() % targetWidth);
                unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth;
                unsigned myPart = container.queryJob().queryMyRank();

                IArrayOf<IRowStream> streams;
                streams.append(*LINK(input));
                --partsPerNode;

 // Should this be merging 1,11,21,31 etc.
                unsigned p=0;
                unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1));
                for (; p<partsPerNode; p++)
                {
                    streams.append(*createRowStreamFromNode(*this, fromPart++, container.queryJob().queryJobComm(), mpTag, abortSoon));
                }
                ICompare *icompare = helper->queryCompare();
                assertex(icompare);
                Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter;
                input.setown(createRowStreamToDataLinkAdapter(inputs.item(0), createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter)));
            }
            else // serve nodes, creating merged parts
                rowServer.setown(createRowServer(this, input, container.queryJob().queryJobComm(), mpTag));
        }
        else if (singlePartKey)
        {
            input.setown(createDataLinkSmartBuffer(this, inputs.item(0), INDEXWRITE_SMART_BUFFER_SIZE, true, false, RCUNBOUND, this, false, &container.queryJob().queryIDiskUsage())); 
            startInput(input);
        }
        else {
            input.set(inputs.item(0));
            startInput(input);
        }
        processed = THORDATALINK_STARTED;

        // single part key support
        // has to serially pull all data fron nodes 2-N
        // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature.
        unsigned node = container.queryJob().queryMyRank();
        if (singlePartKey)
        {
            if (1 == node)
            {
                try
                {
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    loop
                    {
                        OwnedConstThorRow row = input->ungroupedNextRow();
                        if (!row)
                            break;
                        if (abortSoon) return;
                        processRow(row);
                    }

                    unsigned node = 2;
                    while (node <= container.queryJob().querySlaves())
                    {
                        Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input);
                        CMessageBuffer mb;
                        Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb);
                        CThorStreamDeserializerSource rowSource;
                        rowSource.setStream(stream);
                        bool successSR;
                        loop
                        {
                            {
                                BooleanOnOff tf(receivingTag2);
                                successSR = container.queryJob().queryJobComm().sendRecv(mb, node, mpTag2);
                            }
                            if (successSR)
                            {
                                if (rowSource.eos())
                                    break;
                                Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input);
                                do
                                {
                                    RtlDynamicRowBuilder rowBuilder(allocator);
                                    size32_t sz = deserializer->deserialize(rowBuilder, rowSource);
                                    OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz);
                                    processRow(fRow);
                                }
                                while (!rowSource.eos());
                            }
                        }
                        node++;
                    }
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, true);
                    throw;
                }
                close(*partDesc, partCrc, true);
                doStopInput();
            }
            else
            {
                CMessageBuffer mb;
                CMemoryRowSerializer mbs(mb);
                Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input);
                loop
                {
                    BooleanOnOff tf(receivingTag2);
                    if (container.queryJob().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more..
                    {
                        if (abortSoon) break;
                        mb.clear();
                        do
                        {
                            OwnedConstThorRow row = input->ungroupedNextRow();
                            if (!row) break;
                            serializer->serialize(mbs, (const byte *)row.get());
                        } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row
                        if (!container.queryJob().queryJobComm().reply(mb))
                            throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node);
                        if (0 == mb.length())
                            break;
                    }
                }
            }
        }
void CDiskWriteSlaveActivityBase::open()
{
    if (dlfn.isExternal() && !firstNode())
    {
        input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage()));
        startInput(input);
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %" RCPF "d", prevRows);
        }
    }
    else
    {
        input.set(inputs.item(0));
        startInput(input);
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());

    /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression.
     * NB: only for FLAT files, not CSV or XML
     */
    size32_t diskRowMinSz = 0;
    IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
    if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
    {
        diskRowMinSz = diskRowMeta->getMinRecordSize();
        if (grouped)
            diskRowMinSz += 1;
    }

    calcFileCrc = true;

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    unsigned twFlags = external ? TW_External : 0;
    if (query || (external && !firstNode()))
        twFlags |= TW_Direct;
    if (!external || (!query && lastNode()))
        twFlags |= TW_RenameToPrimary;
    if (extend||(external&&!query))
        twFlags |= TW_Extend;

    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL);

    if (compress)
    {
        ActPrintLog("Performing row compression on output file: %s", fName.get());
        // NB: block compressed output has implicit crc of 0, no need to calculate in row  writer.
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(iFileIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(iFileIO));
        unsigned rwFlags = 0;
        if (grouped)
            rwFlags |= rw_grouped;
        if (calcFileCrc)
            rwFlags |= rw_crc;
        out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags));
    }
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s", fName.get());
}