void sendRemainingHeaderLines()
 {
     if (!headerLines)
         return;
     unsigned which = sentHeaderLines->scanInvert(0, false);
     if (which < subFiles)
     {
         CMessageBuffer msgMb;
         bool someLeft=false;
         do
         {
             msgMb.append(which);
             unsigned &remaining = getHeaderLines(which);
             if (0 != remaining)
                 someLeft = true;
             msgMb.append(remaining);
             which = sentHeaderLines->scanInvert(which+1, false);
         }
         while (which < subFiles);
         if (someLeft)
             queryJobChannel().queryJobComm().send(msgMb, queryJobChannel().queryMyRank()+1, mpTag);
         else
             sendAllDone();
     }
 }
Exemple #2
0
 virtual void validateFile(IDistributedFile *file)
 {
     IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper();
     bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags());
     bool isGrouped = fileDesc->isGrouped();
     if (isGrouped != codeGenGrouped)
     {
         Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped");
         queryJobChannel().fireException(e);
     }
     IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta();
     if (recordSize->isFixedSize()) // fixed size
     {
         if (0 != fileDesc->queryProperties().getPropInt("@recordSize"))
         {
             size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize");
             if (isGrouped)
                 rSz--; // eog byte not to be included in this test.
             if (rSz != recordSize->getMinRecordSize())
                 throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize());
         }
         if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags()))
         {
             size32_t rSz = recordSize->getMinRecordSize();
             if (isGrouped) rSz++;
             if (rSz >= MIN_ROWCOMPRESS_RECSIZE)
             {
                 Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get());
                 queryJobChannel().fireException(e);
             }
         }
     }
     if (0 == (TDRnocrccheck & helper->getFlags()))
         checkFormatCrc(this, file, helper->getFormatCrc(), false);
 }
 virtual void init(MemoryBuffer &data, MemoryBuffer &slaveData) override
 {
     if (global)
     {
         mptag_t barrierTag = queryJobChannel().deserializeMPTag(data);
         barrier.setown(queryJobChannel().createBarrier(barrierTag));
     }
 }
 const void *getFirst() // for global, not called on 1st slave
 {
     CMessageBuffer msg;
     if (!queryJobChannel().queryJobComm().recv(msg, queryJobChannel().queryMyRank()-1, mpTag)) // previous node
         return NULL;
     msg.read(count);
     size32_t r = msg.remaining();
     OwnedConstThorRow firstRow;
     if (r)
         firstRow.deserialize(inrowif, r, msg.readDirect(r));
     return firstRow.getClear();
 }
    void sendTallies() // NB: not called on last node.
    {
#if THOR_TRACE_LEVEL >= 5
        StringBuffer s;
        unsigned idx=0;
        for (; idx<numSets; idx++)
            s.append("[").append(tallies[idx]).append("]");
        ActPrintLog("CHOOSESETS: Outgoing count = %s", s.str());
#endif
        CMessageBuffer msg;
        msg.append(numSets * sizeof(unsigned), tallies); 
        queryJobChannel().queryJobComm().send(msg, queryJobChannel().queryMyRank()+1, mpTag);
    }
    virtual void start() override
    {
        ActivityTimer s(totalCycles, timeActivities);
        ActPrintLog(rolloverEnabled ? "GROUP: is global" : "GROUP: is local");
        PARENT::start();
        eogNext = prevEog = eof = false;
        if (rolloverEnabled)
        {
            useRollover = !lastNode();
#ifdef _TESTING
            ActPrintLog("Node number = %d, Total Nodes = %d", queryJobChannel().queryMyRank(), container.queryJob().querySlaves());
#endif
        }

        stream.set(inputStream);
        startLastGroup = getDataLinkGlobalCount();
        next.setown(getNext());

        if (rolloverEnabled && !firstNode())  // 1st node can have nothing to send
        {
            Owned<IThorRowCollector> collector = createThorRowCollector(*this, this, NULL, stableSort_none, rc_mixed, SPILL_PRIORITY_SPILLABLE_STREAM);
            Owned<IRowWriter> writer = collector->getWriter();
            if (next)
            {
                ActPrintLog("GROUP: Sending first group to previous node(%d)", queryJobChannel().queryMyRank()-1);
                for (;;)
                {
                    writer->putRow(next.getLink());
                    if (abortSoon)
                        break; //always send group even when aborting
                    OwnedConstThorRow next2 = getNext();
                    if (!next2)
                    {
                        eof = true;
                        break;
                    }
                    else if (!helper->isSameGroup(next2, next))
                    {
                        next.setown(next2.getClear());
                        break;
                    }
                    next.setown(next2.getClear());
                }
            }
            writer.clear();
            ActPrintLog("GROUP: %" RCPF "d records to send", collector->numRows());
            Owned<IRowStream> strm = collector->getStream();
            rowServer.setown(createRowServer(this, strm, queryJobChannel().queryJobComm(), mpTag));
        }
    }
void CDiskWriteSlaveActivityBase::close()
{
    try
    {
        if (out) {
            uncompressedBytesWritten = out->getPosition();
            if (calcFileCrc) {
                if (diskHelperBase->getFlags() & TDWextend) {
                    assertex(!"TBD need to merge CRC");
                }   
                else
                    out->flush(&fileCRC);
            }
            else if (!abortSoon)
                out->flush();
            out.clear();
        }
        else if (outraw) {
            outraw->flush();
            uncompressedBytesWritten = outraw->tell();
            outraw.clear();
        }

        {
            CriticalBlock block(statsCs);
            mergeStats(fileStats, outputIO);
            outputIO.clear();
        }

        if (!rfsQueryParallel && dlfn.isExternal() && !lastNode())
        {
            rowcount_t rows = processed & THORDATALINK_COUNT_MASK;
            ActPrintLog("External write done, signalling next (row count = %" RCPF "d)", rows);
            CMessageBuffer msg;
            msg.append(rows);
            msg.append(tempExternalName);
            queryJobChannel().queryJobComm().send(msg, queryJobChannel().queryMyRank()+1, mpTag);
        }
    }
    catch (IException *e)
    { 
        ActPrintLogEx(&queryContainer(), e, thorlog_null, MCwarning, "Error closing file: %s", fName.get());
        abortSoon = true;
        removeFiles();
        throw e;
    }
    if (abortSoon)
        removeFiles();
}
 void sendHeaderLines(unsigned subFile, unsigned part)
 {
     if (0 == headerLinesRemaining[subFile])
     {
         if (sentHeaderLines->testSet(subFile))
             return;
         unsigned which = gotHeaderLines->scan(0, false);
         if (which == subFiles) // all received
         {
             bool someLeft=false;
             unsigned hL=0;
             for (; hL<subFiles; hL++)
             {
                 if (headerLinesRemaining[hL])
                 {
                     someLeft = true;
                     break;
                 }
             }
             if (!someLeft)
             {
                 sendAllDone();
                 return;
             }
         }
     }
     else
     {
         if (localLastPart[subFile] != part) // only ready to send if last local part
             return;
         if (sentHeaderLines->testSet(subFile))
             return;
     }
     CMessageBuffer msgMb;
     msgMb.append(subFile);
     msgMb.append(headerLinesRemaining[subFile]);
     // inform next slave about all subfiles I'm not dealing with.
     for (unsigned s=0; s<subFiles; s++)
     {
         if (NotFound == localLastPart[s])
         {
             sentHeaderLines->testSet(s);
             msgMb.append(s);
             msgMb.append(headerLinesRemaining[s]);
         }
     }
     queryJobChannel().queryJobComm().send(msgMb, queryJobChannel().queryMyRank()+1, mpTag);
 }
    void process()
    {
        CMessageBuffer msg;
        unsigned inputs = container.getInputs();
        unsigned slaves = container.queryJob().querySlaves();
        unsigned s;
        bool readSome=false, slaveReadSome;

        IntArray replyTags;
        for (s=0; s<slaves; s++)
            replyTags.append(0);
        while (inputs>1)
        {
            inputs--;
            for (s=0; s<slaves; s++)
            {
                rank_t sender;
                if (!receiveMsg(msg, RANK_ALL, replyTag, &sender)) return;

                replyTags.replace(msg.getReplyTag(), ((int)sender)-1);
                msg.read(slaveReadSome);
                if (slaveReadSome) readSome = true;
            }
            msg.clear().append(readSome);
            for (s=0; s<slaves; s++)
            {
                if (!queryJobChannel().queryJobComm().send(msg, ((rank_t) s+1), (mptag_t) replyTags.item(s), LONGTIMEOUT))
                    throw MakeActivityException(this, 0, "Failed to give result to slave");
            }
            if (readSome) // got some, have told slaves to ignore rest, so finish
                break;
        }
    }
Exemple #10
0
 virtual void init()
 {
     CMasterActivity::init();
     OwnedRoxieString fname(helper->getFileName());
     Owned<IDistributedFile> fetchFile = queryThorFileManager().lookup(container.queryJob(), fname, false, 0 != (helper->getFetchFlags() & FFdatafileoptional), true);
     if (fetchFile)
     {
         if (isFileKey(fetchFile))
             throw MakeActivityException(this, 0, "Attempting to read index as a flat file: %s", fname.get());
         Owned<IFileDescriptor> fileDesc = getConfiguredFileDescriptor(*fetchFile);
         void *ekey;
         size32_t ekeylen;
         helper->getFileEncryptKey(ekeylen,ekey);
         bool encrypted = fileDesc->queryProperties().getPropBool("@encrypted");
         if (0 != ekeylen)
         {
             memset(ekey,0,ekeylen);
             free(ekey);
             if (!encrypted)
             {
                 Owned<IException> e = MakeActivityWarning(&container, TE_EncryptionMismatch, "Ignoring encryption key provided as file '%s' was not published as encrypted", fetchFile->queryLogicalName());
                 queryJobChannel().fireException(e);
             }
         }
         else if (encrypted)
             throw MakeActivityException(this, 0, "File '%s' was published as encrypted but no encryption key provided", fetchFile->queryLogicalName());
         mapping.setown(getFileSlaveMaps(fetchFile->queryLogicalName(), *fileDesc, container.queryJob().queryUserDescriptor(), container.queryJob().querySlaveGroup(), container.queryLocalOrGrouped(), false, NULL, fetchFile->querySuperFile()));
         mapping->serializeFileOffsetMap(offsetMapMb);
         addReadFile(fetchFile);
     }
 }
 void putNext(const void *prev)
 {
     if (nextPut) return;
     nextPut = true;
     if (global && !lastNode())
     {
         CMessageBuffer msg;
         msg.append(count);
         if (prev)
         {
             CMemoryRowSerializer msz(msg);
             ::queryRowSerializer(input)->serialize(msz, (const byte *)prev);
         }
         if (!queryJobChannel().queryJobComm().send(msg, queryJobChannel().queryMyRank()+1, mpTag)) // to next
             return;
     }
 }
 const void *getNext()
 {
     const void *row = stream->ungroupedNextRow();
     if (row)
         return row;
     else if (useRollover)
     {
         useRollover = false;
         // JCSMORE will generate time out log messages, while waiting for next nodes group
         rank_t myNode = queryJobChannel().queryMyRank();
         nextNodeStream.setown(createRowStreamFromNode(*this, myNode+1, queryJobChannel().queryJobComm(), mpTag, abortSoon));
         stream.set(nextNodeStream);
         return stream->nextRow();
     }
     else
         return NULL;
 }
 virtual void onInputFinished(rowcount_t count) override
 {
     if (container.queryLocalOrGrouped())
         return;
     CMessageBuffer msg;
     msg.append(numSets*sizeof(rowcount_t), counts);
     queryJobChannel().queryJobComm().send(msg, 0, mpTag);
 }
 void sendResult(rowcount_t r)
 {
     if (resultSent) return;
     resultSent = true;
     CMessageBuffer mb;
     mb.append(r);
     queryJobChannel().queryJobComm().send(mb, 0, mpTag);
 }
 void sendResult(const void *row, IOutputRowSerializer *serializer, rank_t dst)
 {
     CMessageBuffer mb;
     DelayedSizeMarker sizeMark(mb);
     if (row&&hadElement) {
         CMemoryRowSerializer mbs(mb);
         serializer->serialize(mbs,(const byte *)row);
         sizeMark.write();
     }
     queryJobChannel().queryJobComm().send(mb, dst, mpTag);
 }
Exemple #16
0
 virtual void preStart(size32_t parentExtractSz, const byte *parentExtract)
 {
     CSortBaseActivityMaster::preStart(parentExtractSz, parentExtract);
     ActPrintLog("preStart");
     imaster = CreateThorSorterMaster(this);
     unsigned s=0;
     for (; s<container.queryJob().querySlaves(); s++)
     {
         SocketEndpoint ep;
         ep.deserialize(queryInitializationData(s)); // this is a bit of a Kludge until we get proper MP Thor
         imaster->AddSlave(&queryJobChannel().queryJobComm(), s+1, ep,mpTagRPC);
     }
 }
Exemple #17
0
 virtual void process()
 {
     if (totalCountKnown) return;
     if (container.queryLocalOrGrouped())
         return;
     totalCount = ::getCount(*this, container.queryJob().querySlaves(), stopAfter, mpTag);
     if (totalCount > stopAfter)
         totalCount = stopAfter;
     CMessageBuffer msg;
     msg.append(totalCount);
     if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000))
         throw MakeThorException(0, "Failed to give result to slave");
 }
    void getTallies() // NB: not called on first node.
    {
        CMessageBuffer msg;
        if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag))
            return;
        memcpy(tallies, msg.readDirect(numSets*sizeof(unsigned)), numSets*sizeof(unsigned));
#if THOR_TRACE_LEVEL >= 5
        StringBuffer s;
        unsigned idx=0;
        for (; idx<numSets; idx++)
            s.append("[").append(tallies[idx]).append("]");
        ActPrintLog("CHOOSESETS: Incoming count = %s", s.str());
#endif
    }
Exemple #19
0
    virtual void process()
    {
        IRecordSize *recordSize = helper->queryOutputMeta();

        Owned<IThorRowInterfaces> rowIf = createThorRowInterfaces(queryRowManager(), helper->queryOutputMeta(), queryId(), queryCodeContext());
        OwnedConstThorRow result = getAggregate(*this, container.queryJob().querySlaves(), *rowIf, *helper, mpTag);
        if (!result)
            return;
        CMessageBuffer msg;
        CMemoryRowSerializer mbs(msg);
        rowIf->queryRowSerializer()->serialize(mbs, (const byte *)result.get());
        if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000))
            throw MakeThorException(0, "Failed to give result to slave");
    }
    void process()
    {
        start();
        processed = 0;

        processed = THORDATALINK_STARTED;

        OwnedConstThorRow row = inputStream->ungroupedNextRow();
        CMessageBuffer mb;
        DelayedSizeMarker sizeMark(mb);
        if (row)
        {
            CMemoryRowSerializer msz(mb);
            ::queryRowSerializer(input)->serialize(msz,(const byte *)row.get());
            sizeMark.write();
            processed++;
        }
        queryJobChannel().queryJobComm().send(mb, 0, masterMpTag);
    }
Exemple #21
0
    void process()
    {
        CWorkUnitWriteMasterBase::process();

        unsigned nslaves = container.queryJob().querySlaves();

        CMessageBuffer mb;
        unsigned s=0;
        for (; s<nslaves; s++)
        {
            loop
            {
                if (!queryJobChannel().queryJobComm().send(mb, s+1, mpTag)) return;
                if (!receiveMsg(mb, s+1, mpTag)) return;
                if (0 == mb.length())
                    break;
                unsigned numGot;
                mb.read(numGot);
                unsigned l=mb.remaining();
                if (workunitWriteLimit && totalSize+resultData.length()+l > workunitWriteLimit)
                {
                    StringBuffer errMsg("Dataset too large to output to workunit (limit is set to ");
                    errMsg.append(workunitWriteLimit/0x100000).append(") megabytes, in result (");
                    if (resultName.length())
                        errMsg.append("name=").append(resultName);
                    else
                        errMsg.append("sequence=").append(resultSeq);
                    errMsg.append(")");
                    throw MakeThorException(TE_WorkUnitWriteLimitExceeded, "%s", errMsg.str());
                }
                resultData.append(l, mb.readDirect(l));
                mb.clear();
                numResults += numGot;

                if (-1 != flushThreshold && resultData.length() >= (unsigned)flushThreshold)
                    flushResults();
            }
        }
        flushResults(true);
    }
 rowcount_t aggregateToLimit()
 {
     rowcount_t total = 0;
     ICommunicator &comm = queryJobChannel().queryJobComm();
     unsigned slaves = container.queryJob().querySlaves();
     unsigned s;
     for (s=0; s<slaves; s++)
     {
         CMessageBuffer msg;
         rank_t sender;
         if (!receiveMsg(msg, RANK_ALL, mpTag, &sender))
             return 0;
         if (abortSoon)
             return 0;
         rowcount_t count;
         msg.read(count);
         total += count;
         if (total > limit)
             break;
     }
     return total;
 }
Exemple #23
0
    void process()
    {
        unsigned slaves = container.queryJob().querySlaves();
        IHThorLimitArg *helper = (IHThorLimitArg *)queryHelper();

        rowcount_t rowLimit = (rowcount_t)helper->getRowLimit();
        rowcount_t total = 0;
        while (slaves--)
        {
            CMessageBuffer mb;
            if (!receiveMsg(mb, RANK_ALL, mpTag, NULL))
                return;
            if (abortSoon)
                return;
            rowcount_t count;
            mb.read(count);
            total += count;
            if (total > rowLimit)
                break;
        }
        switch (container.getKind())
        {
            case TAKcreaterowlimit: 
            case TAKskiplimit: 
            {
                unsigned slaves = container.queryJob().querySlaves();
                CMessageBuffer mb;
                mb.append(total);
                queryJobChannel().queryJobComm().send(mb, RANK_ALL_OTHER, mpTag);
                break;
            }
            case TAKlimit:
            {
                if (total > rowLimit)
                    helper->onLimitExceeded();
                break;
            }
        }
    }
Exemple #24
0
 bool sendLoopingCount(unsigned n, unsigned emptyIterations)
 {
     if (!container.queryLocalOrGrouped())
     {
         if (global || (lastMaxEmpty && (0 == emptyIterations)) || (!lastMaxEmpty && (emptyIterations>maxEmptyLoopIterations)) || ((0 == n) && (0 == emptyIterations)))
         {
             CMessageBuffer msg; // inform master starting
             msg.append(n);
             msg.append(emptyIterations);
             queryJobChannel().queryJobComm().send(msg, 0, mpTag);
             if (!global)
             {
                 lastMaxEmpty = emptyIterations>maxEmptyLoopIterations;
                 return true;
             }
             receiveMsg(msg, 0, mpTag);
             bool ok;
             msg.read(ok);
             return ok;
         }
     }
     return true;
 }
void CDiskWriteSlaveActivityBase::abort()
{
    ProcessSlaveActivity::abort();
    if (!rfsQueryParallel && dlfn.isExternal() && !firstNode())
        cancelReceiveMsg(queryJobChannel().queryMyRank()-1, mpTag);
}
Exemple #26
0
 virtual void init(MemoryBuffer &data, MemoryBuffer &slaveData) override
 {
     mpTagRPC = container.queryJobChannel().deserializeMPTag(data);
     mptag_t barrierTag = container.queryJobChannel().deserializeMPTag(data);
     barrier.setown(container.queryJobChannel().createBarrier(barrierTag));
     portbase = allocPort(NUMSLAVEPORTS);
     ActPrintLog("MSortSlaveActivity::init portbase = %d, mpTagRPC = %d",portbase,(int)mpTagRPC);
     server.setLocalHost(portbase);
     helper = (IHThorSortArg *)queryHelper();
     sorter.setown(CreateThorSorter(this, server,&container.queryJob().queryIDiskUsage(),&queryJobChannel().queryJobComm(),mpTagRPC));
     server.serialize(slaveData);
 }
void CDiskWriteSlaveActivityBase::open()
{
    if (dlfn.isExternal() && !firstNode())
    {
        input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage()));
        startInput(input);
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %" RCPF "d", prevRows);
        }
    }
    else
    {
        input.set(inputs.item(0));
        startInput(input);
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());

    /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression.
     * NB: only for FLAT files, not CSV or XML
     */
    size32_t diskRowMinSz = 0;
    IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
    if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
    {
        diskRowMinSz = diskRowMeta->getMinRecordSize();
        if (grouped)
            diskRowMinSz += 1;
    }

    calcFileCrc = true;

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    unsigned twFlags = external ? TW_External : 0;
    if (query || (external && !firstNode()))
        twFlags |= TW_Direct;
    if (!external || (!query && lastNode()))
        twFlags |= TW_RenameToPrimary;
    if (extend||(external&&!query))
        twFlags |= TW_Extend;

    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL);

    if (compress)
    {
        ActPrintLog("Performing row compression on output file: %s", fName.get());
        // NB: block compressed output has implicit crc of 0, no need to calculate in row  writer.
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(iFileIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(iFileIO));
        unsigned rwFlags = 0;
        if (grouped)
            rwFlags |= rw_grouped;
        if (calcFileCrc)
            rwFlags |= rw_crc;
        out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags));
    }
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s", fName.get());
}
    virtual void process() override
    {
        ActPrintLog("INDEXWRITE: Start");
        init();

        IRowStream *stream = inputStream;
        ThorDataLinkMetaInfo info;
        input->getMetaInfo(info);
        outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize()));
        start();
        if (refactor)
        {
            assertex(isLocal);
            if (active)
            {
                unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0);
                assertex(0 == container.queryJob().querySlaves() % targetWidth);
                unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth;
                unsigned myPart = queryJobChannel().queryMyRank();

                IArrayOf<IRowStream> streams;
                streams.append(*LINK(stream));
                --partsPerNode;

 // Should this be merging 1,11,21,31 etc.
                unsigned p=0;
                unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1));
                for (; p<partsPerNode; p++)
                {
                    streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon));
                }
                ICompare *icompare = helper->queryCompare();
                assertex(icompare);
                Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter;
                myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter));
                stream = myInputStream;
            }
            else // serve nodes, creating merged parts
                rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag));
        }
        processed = THORDATALINK_STARTED;

        // single part key support
        // has to serially pull all data fron nodes 2-N
        // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature.
        unsigned node = queryJobChannel().queryMyRank();
        if (singlePartKey)
        {
            if (1 == node)
            {
                try
                {
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    loop
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        if (abortSoon) return;
                        processRow(row);
                    }

                    unsigned node = 2;
                    while (node <= container.queryJob().querySlaves())
                    {
                        Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input);
                        CMessageBuffer mb;
                        Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb);
                        CThorStreamDeserializerSource rowSource;
                        rowSource.setStream(stream);
                        bool successSR;
                        loop
                        {
                            {
                                BooleanOnOff tf(receivingTag2);
                                successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2);
                            }
                            if (successSR)
                            {
                                if (rowSource.eos())
                                    break;
                                Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input);
                                do
                                {
                                    RtlDynamicRowBuilder rowBuilder(allocator);
                                    size32_t sz = deserializer->deserialize(rowBuilder, rowSource);
                                    OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz);
                                    processRow(fRow);
                                }
                                while (!rowSource.eos());
                            }
                        }
                        node++;
                    }
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, true);
                    throw;
                }
                close(*partDesc, partCrc, true);
                doStopInput();
            }
            else
            {
                CMessageBuffer mb;
                CMemoryRowSerializer mbs(mb);
                Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input);
                loop
                {
                    BooleanOnOff tf(receivingTag2);
                    if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more..
                    {
                        if (abortSoon) break;
                        mb.clear();
                        do
                        {
                            OwnedConstThorRow row = inputStream->ungroupedNextRow();
                            if (!row) break;
                            serializer->serialize(mbs, (const byte *)row.get());
                        } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row
                        if (!queryJobChannel().queryJobComm().reply(mb))
                            throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node);
                        if (0 == mb.length())
                            break;
                    }
                }
            }
        }
    void prepareKey(IDistributedFile *index)
    {
        IDistributedFile *f = index;
        IDistributedSuperFile *super = f->querySuperFile();

        unsigned nparts = f->numParts(); // includes tlks if any, but unused in array
        performPartLookup.ensure(nparts);

        bool checkTLKConsistency = (nullptr != super) && !localKey && (0 != (TIRsorted & indexBaseHelper->getFlags()));
        if (nofilter)
        {
            while (nparts--) performPartLookup.append(true);
            if (!checkTLKConsistency) return;
        }
        else
        {
            while (nparts--) performPartLookup.append(false); // parts to perform lookup set later
        }

        Owned<IDistributedFileIterator> iter;
        if (super)
        {
            iter.setown(super->getSubFileIterator(true));
            verifyex(iter->first());
            f = &iter->query();
        }
        unsigned width = f->numParts();
        if (!localKey)
            --width;
        assertex(width);
        unsigned tlkCrc = 0;
        bool first = true;
        unsigned superSubIndex=0;
        bool fileCrc = false, rowCrc = false;
        for (;;)
        {
            Owned<IDistributedFilePart> part = f->getPart(width);
            if (checkTLKConsistency)
            {
                unsigned _tlkCrc;
                if (part->getCrc(_tlkCrc))
                    fileCrc = true;
                else if (part->queryAttributes().hasProp("@crc")) // NB: key "@crc" is not a crc on the file, but data within.
                {
                    _tlkCrc = part->queryAttributes().getPropInt("@crc");
                    rowCrc = true;
                }
                else if (part->queryAttributes().hasProp("@tlkCrc")) // backward compat.
                {
                    _tlkCrc = part->queryAttributes().getPropInt("@tlkCrc");
                    rowCrc = true;
                }
                else
                {
                    if (rowCrc || fileCrc)
                    {
                        checkTLKConsistency = false;
                        Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, some crc attributes are missing", super->queryLogicalName());
                        queryJobChannel().fireException(e);
                    }
                }
                if (rowCrc && fileCrc)
                {
                    checkTLKConsistency = false;
                    Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, due to mixed crc types.", super->queryLogicalName());
                    queryJobChannel().fireException(e);
                }
                if (checkTLKConsistency)
                {
                    if (first)
                    {
                        tlkCrc = _tlkCrc;
                        first = false;
                    }
                    else if (tlkCrc != _tlkCrc)
                        throw MakeActivityException(this, 0, "Sorted output on super files comprising of non coparitioned sub keys is not supported (TLK's do not match)");
                }
            }
            if (!nofilter)
            {
                Owned<IKeyIndex> keyIndex;
                unsigned copy;
                for (copy=0; copy<part->numCopies(); copy++)
                {
                    RemoteFilename rfn;
                    OwnedIFile ifile = createIFile(part->getFilename(rfn,copy));
                    if (ifile->exists())
                    {
                        StringBuffer remotePath;
                        rfn.getRemotePath(remotePath);
                        unsigned crc = 0;
                        part->getCrc(crc);
                        keyIndex.setown(createKeyIndex(remotePath.str(), crc, false, false));
                        break;
                    }
                }
                if (!keyIndex)
                    throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", index->queryLogicalName());

                unsigned fixedSize = indexBaseHelper->queryDiskRecordSize()->querySerializedDiskMeta()->getFixedSize(); // used only if fixed
                Owned <IKeyManager> tlk = createLocalKeyManager(keyIndex, fixedSize, NULL);
                indexBaseHelper->createSegmentMonitors(tlk);
                tlk->finishSegmentMonitors();
                tlk->reset();
                while (tlk->lookup(false))
                {
                    if (tlk->queryFpos())
                        performPartLookup.replace(true, (aindex_t)(super?super->numSubFiles(true)*(tlk->queryFpos()-1)+superSubIndex:tlk->queryFpos()-1));
                }
            }
            if (!super||!iter->next())
                break;
            superSubIndex++;
            f = &iter->query();
            if (width != f->numParts()-1)
                throw MakeActivityException(this, 0, "Super key %s, with mixture of sub key width are not supported.", f->queryLogicalName());
        }
    }
Exemple #30
0
    virtual void process()
    {
        ActPrintLog("GlobalMergeActivityMaster::process");
        CMasterActivity::process();     
        IHThorMergeArg *helper = (IHThorMergeArg *)queryHelper();   
        Owned<IThorRowInterfaces> rowif = createRowInterfaces(helper->queryOutputMeta());
        CThorKeyArray sample(*this, rowif,helper->querySerialize(),helper->queryCompare(),helper->queryCompareKey(),helper->queryCompareRowKey());

        unsigned n = container.queryJob().querySlaves();
        mptag_t *replytags = new mptag_t[n];
        mptag_t *intertags = new mptag_t[n];
        unsigned i;
        for (i=0;i<n;i++) {
            replytags[i] = TAG_NULL;
            intertags[i] = TAG_NULL;
        }
        try {
            for (i=0;i<n;i++) {
                if (abortSoon)
                    return;
                CMessageBuffer mb;
#ifdef _TRACE
                ActPrintLog("Merge process, Receiving on tag %d",replyTag);
#endif
                rank_t sender;
                if (!receiveMsg(mb, RANK_ALL, replyTag, &sender)||abortSoon) 
                    return;
#ifdef _TRACE
                ActPrintLog("Merge process, Received sample from %d",sender);
#endif
                sender--;
                assertex((unsigned)sender<n);
                assertex(replytags[(unsigned)sender]==TAG_NULL);
                deserializeMPtag(mb,replytags[(unsigned)sender]);
                deserializeMPtag(mb,intertags[(unsigned)sender]);
                sample.deserialize(mb,true);
            }
            ActPrintLog("GlobalMergeActivityMaster::process samples merged");
            sample.createSortedPartition(n);
            ActPrintLog("GlobalMergeActivityMaster::process partition generated");
            for (i=0;i<n;i++) {
                if (abortSoon)
                    break;
                CMessageBuffer mb;
                mb.append(n);
                for (unsigned j = 0;j<n;j++)
                    serializeMPtag(mb,intertags[j]);
                sample.serialize(mb);
#ifdef _TRACE
                ActPrintLog("Merge process, Replying to node %d tag %d",i+1,replytags[i]);
#endif
                if (!queryJobChannel().queryJobComm().send(mb, (rank_t)i+1, replytags[i]))
                    break;
            }
        
        }
        catch (IException *e) {
            delete [] replytags;
            delete [] intertags;
            ActPrintLog(e, "MERGE");
            throw;
        }
        delete [] replytags;
        delete [] intertags;
        ActPrintLog("GlobalMergeActivityMaster::process exit");
    }