Ejemplo n.º 1
0
void MatrixValueElement::convertTextToMatrix(std::string text) {
    std::string temp=text;
    std::string row;
    int order=0;
    std::vector<int> rvalues;
    int i=1;
    for(i=1; temp.find(":")!=-1; i++) {
        row=temp.substr(0,temp.find(":")-1);
        rvalues=rowBuilder(row);
        if(order==0||rvalues.size()==order)
            order=rvalues.size();
        else {
            m.clear();
            std::cout<<"\nInvalid Matrix Order";
            return;
        }
        m.push_back(rvalues);
        temp.erase(0,temp.find(":")+1);
    }
    if(temp.find("}")==temp.length()-1) {
        row=temp.substr(0,temp.find("}"));
        rvalues=rowBuilder(row);
        if(order==0||rvalues.size()==order)
            order=rvalues.size();
        else {
            m.clear();
            std::cout<<"\nInvalid Matrix Order";
            return;
        }
        m.push_back(rvalues);
        temp.clear();
    }
}
    const void *getResult(const void *firstRow)
    {
        IHThorAggregateArg *helper = (IHThorAggregateArg *)baseHelper.get();
        unsigned numPartialResults = container.queryJob().querySlaves();
        if (1 == numPartialResults)
            return firstRow;

        CThorExpandingRowArray partialResults(*this, this, true, stableSort_none, true, numPartialResults);
        if (hadElement)
            partialResults.setRow(0, firstRow);
        --numPartialResults;

        size32_t sz;
        while (numPartialResults--)
        {
            CMessageBuffer msg;
            rank_t sender;
            if (!receiveMsg(msg, RANK_ALL, mpTag, &sender))
                return NULL;
            if (abortSoon)
                return NULL;
            msg.read(sz);
            if (sz)
            {
                assertex(NULL == partialResults.query(sender-1));
                CThorStreamDeserializerSource mds(sz, msg.readDirect(sz));
                RtlDynamicRowBuilder rowBuilder(queryRowAllocator());
                size32_t sz = queryRowDeserializer()->deserialize(rowBuilder, mds);
                partialResults.setRow(sender-1, rowBuilder.finalizeRowClear(sz));
            }
        }
        RtlDynamicRowBuilder rowBuilder(queryRowAllocator(), false);
        bool first = true;
        numPartialResults = container.queryJob().querySlaves();
        unsigned p=0;
        for (;p<numPartialResults; p++)
        {
            const void *row = partialResults.query(p);
            if (row)
            {
                if (first)
                {
                    first = false;
                    sz = cloneRow(rowBuilder, row, queryRowMetaData());
                }
                else
                    sz = helper->mergeAggregate(rowBuilder, row);
            }
        }
        if (first)
            sz = helper->clearAggregate(rowBuilder);
        return rowBuilder.finalizeRowClear(sz);
    }
Ejemplo n.º 3
0
 virtual byte * getRowResult(IEngineRowAllocator * _resultAllocator)
 {
     RtlDynamicRowBuilder rowBuilder(_resultAllocator);
     SqLite3RowBuilder sqliteRowBuilder(stmt);
     const RtlTypeInfo *typeInfo = _resultAllocator->queryOutputMeta()->queryTypeInfo();
     assertex(typeInfo);
     RtlFieldStrInfo dummyField("<row>", NULL, typeInfo);
     size32_t len = typeInfo->build(rowBuilder, 0, &dummyField, sqliteRowBuilder);
     return (byte *) rowBuilder.finalizeRowClear(len);
 }
Ejemplo n.º 4
0
 inline void deserialize(IRowInterfaces *rowif, size32_t memsz, const void *mem)
 {
     if (memsz) {
         RtlDynamicRowBuilder rowBuilder(rowif->queryRowAllocator());
         //GH->NH This now has a higher overhead than you are likely to want at this point...
         CThorStreamDeserializerSource dsz(memsz,mem);
         size32_t size = rowif->queryRowDeserializer()->deserialize(rowBuilder,dsz);
         setown(rowBuilder.finalizeRowClear(size));
     }
     else
         clear();
 }
Ejemplo n.º 5
0
void RtlLimitedFixedDatasetBuilder::flushDataset()
{
    if (rowCreator)
    {
        while (totalSize < maxRows * recordSize)
        {
            createRow();
            size32_t size = rowCreator(rowBuilder(), ctx);
            finalizeRow(size);
        }
    }
    RtlFixedDatasetBuilder::flushDataset();
}
Ejemplo n.º 6
0
void RtlLimitedVariableDatasetBuilder::flushDataset()
{
    if (rowCreator)
    {
        while (numRows < maxRows)
        {
            createRow();
            size32_t thisSize = rowCreator(rowBuilder(), ctx);
            finalizeRow(thisSize);
        }
    }
    RtlVariableDatasetBuilder::flushDataset();
}
Ejemplo n.º 7
0
 const void * getResult()
 {
     CMessageBuffer mb;
     if (receiveMsg(mb, 0, mpTag)) {
         size32_t sz;
         mb.read(sz);
         if (sz) {
             CThorStreamDeserializerSource ds(sz,mb.readDirect(sz));
             RtlDynamicRowBuilder rowBuilder(queryRowAllocator());
             size32_t sz = queryRowDeserializer()->deserialize(rowBuilder,ds);
             return rowBuilder.finalizeRowClear(sz);
         }
     }
     return NULL;
 }
Ejemplo n.º 8
0
const void *CPartialResultAggregator::getResult()
{
    CMessageBuffer mb;
    if (activity.receiveMsg(mb, 0, activity.queryMpTag()))
    {
        if (mb.length())
        {
            CThorStreamDeserializerSource ds(mb.length(), mb.readDirect(mb.length()));
            RtlDynamicRowBuilder rowBuilder(activity.queryRowAllocator());
            size32_t sz = activity.queryRowDeserializer()->deserialize(rowBuilder,ds);
            return rowBuilder.finalizeRowClear(sz);
        }
    }
    return NULL;
}
Ejemplo n.º 9
0
 virtual const void *nextRow()
 {
     if (!stmt)
         return NULL;
     int rc = sqlite3_step(stmt);
     if (rc != SQLITE_ROW)
     {
         stmt.clear();
         return NULL;
     }
     RtlDynamicRowBuilder rowBuilder(resultAllocator);
     SqLite3RowBuilder sqliteRowBuilder(stmt);
     const RtlTypeInfo *typeInfo = resultAllocator->queryOutputMeta()->queryTypeInfo();
     assertex(typeInfo);
     RtlFieldStrInfo dummyField("<row>", NULL, typeInfo);
     size32_t len = typeInfo->build(rowBuilder, 0, &dummyField, sqliteRowBuilder);
     return rowBuilder.finalizeRowClear(len);
 }
Ejemplo n.º 10
0
 const void *getRow(rowcount_t pos)
 {
     CriticalBlock block(crit);
     offset_t ofs[2]; // JCSMORE doesn't really need 2, only to verify read right amount below
     size32_t rd = _readOverflowPos(pos, 2, ofs, false);
     assertex(rd==sizeof(ofs));
     size32_t idxSz = (size32_t)(ofs[1]-ofs[0]);
     if (!dataFileIO)
     {
         dataFileIO.setown(dataFile->open(IFOread));
         dataFileStream.setown(createFileSerialStream(dataFileIO));
         dataFileDeserializerSource.setStream(dataFileStream);
     }
     dataFileStream->reset(ofs[0], idxSz);
     RtlDynamicRowBuilder rowBuilder(rowIf->queryRowAllocator());
     size32_t sz = rowIf->queryRowDeserializer()->deserialize(rowBuilder, dataFileDeserializerSource);
     return rowBuilder.finalizeRowClear(sz);
 }
Ejemplo n.º 11
0
    CATCH_NEXTROW()
    {
        ActivityTimer t(totalCycles, timeActivities, NULL);
        if (abortSoon || rowSource.eos())
            return NULL;

        if (eogPending)
        {
            eogPending = false;
            return NULL;
        }

        RtlDynamicRowBuilder rowBuilder(queryRowAllocator());
        size32_t sz = queryRowDeserializer()->deserialize(rowBuilder,rowSource);
        if (grouped)
            rowSource.read(sizeof(bool), &eogPending);
        dataLinkIncrement();
        return rowBuilder.finalizeRowClear(sz);
    }
Ejemplo n.º 12
0
void CSteppedConjunctionOptimizer::beforeProcessing()
{
    //NB: This function is only called once, after we have decided it is worth processing.
    assertex(!eof);     // just check it isn't called more than once
    assertex(numInputs);

    bool hasDistance = (helper.getJoinFlags() & IHThorNWayMergeJoinArg::MJFhasdistance) != 0;
    for (unsigned i3 = 0; i3 < numInputs; i3++)
    {
        OrderedInput & next = *new OrderedInput(inputs.item(i3), i3, hasDistance);
        orderedInputs.append(next);
        if (next.canOptimizeOrder())
            numOptimizeInputs++;
    }
    //Sort so that inputs are ordered (priority-inputs, optimizable, non-optimizable)
    orderedInputs.sort(compareInitialInputOrder);

    //If only a single re-orderable input, treat it as unorderable.
    if (numOptimizeInputs == 1)
    {
        assertex(orderedInputs.item(numPriorityInputs).canOptimizeOrder());
        orderedInputs.item(numPriorityInputs).stopOptimizeOrder();
        numOptimizeInputs = 0;
    }

    maxOptimizeInput = numPriorityInputs + numOptimizeInputs;

    associateRemoteInputs(orderedInputs, numPriorityInputs);
    
    //MORE: If some inputs have known priority, and other remote inputs don't, then we could consider
    //      connecting the unknown inputs to the last known inputs.
    ForEachItemIn(i4, joins)
        joins.item(i4).markRestrictedJoin(numEqualFields);

    assertex(helper.getJoinFlags() & IHThorNWayMergeJoinArg::MJFhasclearlow);       // Don't support (very) old workunits that don't define this..
    if (helper.getJoinFlags() & IHThorNWayMergeJoinArg::MJFhasclearlow)
    {
        RtlDynamicRowBuilder rowBuilder(inputAllocator);
        size32_t size = helper.createLowInputRow(rowBuilder);
        lowestSeekRow = rowBuilder.finalizeRowClear(size);
    }
}
Ejemplo n.º 13
0
const void *getAggregate(CActivityBase &activity, unsigned partialResults, IRowInterfaces &rowIf, IHThorCompoundAggregateExtra &aggHelper, mptag_t mpTag)
{
    // JCSMORE - pity this isn't common routine with similar one in aggregate, but helper is not common
    CThorRowArray slaveResults;
    slaveResults.ensure(partialResults);
    unsigned _partialResults = partialResults;
    while (_partialResults--)
    {
        CMessageBuffer mb;
        rank_t sender;
        if (!activity.receiveMsg(mb, RANK_ALL, mpTag, &sender)) return false;
        if (activity.queryAbortSoon()) return 0;
        if (mb.length())
        {
            CThorStreamDeserializerSource ds(mb.length(), mb.readDirect(mb.length()));
            RtlDynamicRowBuilder rowBuilder(rowIf.queryRowAllocator());
            size32_t sz = rowIf.queryRowDeserializer()->deserialize(rowBuilder, ds);
            slaveResults.setRow(sender-1, rowBuilder.finalizeRowClear(sz));
        }
    }
    RtlDynamicRowBuilder result(rowIf.queryRowAllocator(), false);
    size32_t sz;
    bool first = true;
    _partialResults = 0;
    for (;_partialResults<partialResults; _partialResults++)
    {
        const void *partialResult = slaveResults.item(_partialResults);
        if (partialResult)
        {
            if (first)
            {
                first = false;
                sz = cloneRow(result, slaveResults.item(_partialResults), rowIf.queryRowMetaData());
            }
            else
                sz = aggHelper.mergeAggregate(result, partialResult);
        }
    }
    if (first)
        sz = aggHelper.clearAggregate(result);
    return result.finalizeRowClear(sz);
}
Ejemplo n.º 14
0
 const void * CouchbaseRowStream::nextRow()
 {
     const void * result = NULL;
     if (m_shouldRead && m_currentRow < m_Rows.length())
     {
         auto json = m_Rows.item(m_currentRow++);
         Owned<IPropertyTree> contentTree = createPTreeFromJSONString(json,ipt_caseInsensitive);
         if (contentTree)
         {
             CouchbaseRowBuilder * cbRowBuilder = new CouchbaseRowBuilder(contentTree);
             RtlDynamicRowBuilder rowBuilder(m_resultAllocator);
             const RtlTypeInfo *typeInfo = m_resultAllocator->queryOutputMeta()->queryTypeInfo();
             assertex(typeInfo);
             RtlFieldStrInfo dummyField("<row>", NULL, typeInfo);
             size32_t len = typeInfo->build(rowBuilder, 0, &dummyField, *cbRowBuilder);
             return rowBuilder.finalizeRowClear(len);
         }
         else
             failx("Error processing result row");
     }
     return result;
 }
Ejemplo n.º 15
0
    virtual void process()
    {
        CMasterActivity::process();

        bool results = false;
        unsigned nslaves = container.queryJob().querySlaves();
        while (nslaves--)
        {
            CMessageBuffer mb;
            if (abortSoon || !receiveMsg(mb, RANK_ALL, replyTag, NULL)) break;
            StringBuffer str;
            mb.getSender().getUrlStr(str);
            size32_t sz;
            mb.read(sz);
            if (sz)
            {
                if (results)
                    throw MakeThorException(TE_UnexpectedMultipleSlaveResults, "Received greater than one result from slaves");
                IHThorRemoteResultArg *helper = (IHThorRemoteResultArg *)queryHelper();
                Owned<IThorRowInterfaces> resultRowIf = createRowInterfaces(helper->queryOutputMeta());
                CThorStreamDeserializerSource mds(sz, mb.readDirect(sz));
                RtlDynamicRowBuilder rowBuilder(resultRowIf->queryRowAllocator());
                size32_t sz = resultRowIf->queryRowDeserializer()->deserialize(rowBuilder, mds);
                OwnedConstThorRow result = rowBuilder.finalizeRowClear(sz);
                helper->sendResult(result);
                results = true;
            }
        }
        if (!results && !abortSoon)
        {
            ActPrintLog("WARNING: no results");
            IHThorRemoteResultArg *helper = (IHThorRemoteResultArg *)queryHelper();
            //helper->sendResult(NULL);
            // Jake I think this always cores (so raise exception instead)
            throw MakeThorException(TE_UnexpectedMultipleSlaveResults, "Received no results from slaves");

        }
    }
Ejemplo n.º 16
0
    STRAND_CATCH_NEXTROW()
    {
        ActivityTimer t(totalCycles, timeActivities);
        loop
        {
            if (parent.queryAbortSoon())
                return nullptr;
            OwnedConstThorRow in = inputStream->nextRow();
            if (!in)
            {
                if (numProcessedLastGroup == rowsProcessed)
                    in.setown(inputStream->nextRow());
                if (!in)
                {
                    numProcessedLastGroup = rowsProcessed;
                    return nullptr;
                }
            }

            RtlDynamicRowBuilder rowBuilder(allocator);
            size32_t outSize;
            try
            {
                outSize = helper->transform(rowBuilder, in);
            }
            catch (IException *e)
            {
                parent.ActPrintLog(e, "In helper->transform()");
                throw;
            }
            if (outSize)
            {
                rowsProcessed++;
                return rowBuilder.finalizeRowClear(outSize);
            }
        }
    }
Ejemplo n.º 17
0
    virtual void process() override
    {
        ActPrintLog("INDEXWRITE: Start");
        init();

        IRowStream *stream = inputStream;
        ThorDataLinkMetaInfo info;
        input->getMetaInfo(info);
        outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize()));
        start();
        if (refactor)
        {
            assertex(isLocal);
            if (active)
            {
                unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0);
                assertex(0 == container.queryJob().querySlaves() % targetWidth);
                unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth;
                unsigned myPart = queryJobChannel().queryMyRank();

                IArrayOf<IRowStream> streams;
                streams.append(*LINK(stream));
                --partsPerNode;

 // Should this be merging 1,11,21,31 etc.
                unsigned p=0;
                unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1));
                for (; p<partsPerNode; p++)
                {
                    streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon));
                }
                ICompare *icompare = helper->queryCompare();
                assertex(icompare);
                Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter;
                myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter));
                stream = myInputStream;
            }
            else // serve nodes, creating merged parts
                rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag));
        }
        processed = THORDATALINK_STARTED;

        // single part key support
        // has to serially pull all data fron nodes 2-N
        // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature.
        unsigned node = queryJobChannel().queryMyRank();
        if (singlePartKey)
        {
            if (1 == node)
            {
                try
                {
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    for (;;)
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        if (abortSoon) return;
                        processRow(row);
                    }

                    unsigned node = 2;
                    while (node <= container.queryJob().querySlaves())
                    {
                        Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input);
                        CMessageBuffer mb;
                        Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb);
                        CThorStreamDeserializerSource rowSource;
                        rowSource.setStream(stream);
                        bool successSR;
                        for (;;)
                        {
                            {
                                BooleanOnOff tf(receivingTag2);
                                successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2);
                            }
                            if (successSR)
                            {
                                if (rowSource.eos())
                                    break;
                                Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input);
                                do
                                {
                                    RtlDynamicRowBuilder rowBuilder(allocator);
                                    size32_t sz = deserializer->deserialize(rowBuilder, rowSource);
                                    OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz);
                                    processRow(fRow);
                                }
                                while (!rowSource.eos());
                            }
                        }
                        node++;
                    }
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, true);
                    throw;
                }
                close(*partDesc, partCrc, true);
                stop();
            }
            else
            {
                CMessageBuffer mb;
                CMemoryRowSerializer mbs(mb);
                Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input);
                for (;;)
                {
                    BooleanOnOff tf(receivingTag2);
                    if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more..
                    {
                        if (abortSoon) break;
                        mb.clear();
                        do
                        {
                            OwnedConstThorRow row = inputStream->ungroupedNextRow();
                            if (!row) break;
                            serializer->serialize(mbs, (const byte *)row.get());
                        } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row
                        if (!queryJobChannel().queryJobComm().reply(mb))
                            throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node);
                        if (0 == mb.length())
                            break;
                    }
                }
            }
        }
        else
        {
            if (!refactor || active)
            {
                try
                {
                    StringBuffer partFname;
                    getPartFilename(*partDesc, 0, partFname);
                    ActPrintLog("INDEXWRITE: process: handling fname : %s", partFname.str());
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    ActPrintLog("INDEXWRITE: write");

                    BooleanOnOff tf(receiving);
                    if (!refactor || !active)
                        receiving = false;
                    do
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        processRow(row);
                    } while (!abortSoon);
                    ActPrintLog("INDEXWRITE: write level 0 complete");
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node);
                    throw;
                }
                close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node);
                stop();

                ActPrintLog("INDEXWRITE: Wrote %" RCPF "d records", processed & THORDATALINK_COUNT_MASK);

                if (buildTlk)
                {
                    ActPrintLog("INDEXWRITE: sending rows");
                    NodeInfoArray tlkRows;

                    CMessageBuffer msg;
                    if (firstNode())
                    {
                        if (processed & THORDATALINK_COUNT_MASK)
                        {
                            if (enableTlkPart0)
                                tlkRows.append(* new CNodeInfo(0, firstRow.get(), firstRowSize, totalCount));
                            tlkRows.append(* new CNodeInfo(1, lastRow.get(), lastRowSize, totalCount));
                        }
                    }
                    else
                    {
                        if (processed & THORDATALINK_COUNT_MASK)
                        {
                            CNodeInfo row(queryJobChannel().queryMyRank(), lastRow.get(), lastRowSize, totalCount);
                            row.serialize(msg);
                        }
                        queryJobChannel().queryJobComm().send(msg, 1, mpTag);
                    }

                    if (firstNode())
                    {
                        ActPrintLog("INDEXWRITE: Waiting on tlk to complete");

                        // JCSMORE if refactor==true, is rowsToReceive here right??
                        unsigned rowsToReceive = (refactor ? (tlkDesc->queryOwner().numParts()-1) : container.queryJob().querySlaves()) -1; // -1 'cos got my own in array already
                        ActPrintLog("INDEXWRITE: will wait for info from %d slaves before writing TLK", rowsToReceive);
                        while (rowsToReceive--)
                        {
                            msg.clear();
                            receiveMsg(msg, RANK_ALL, mpTag); // NH->JCS RANK_ALL_OTHER not supported for recv
                            if (abortSoon)
                                return;
                            if (msg.length())
                            {
                                CNodeInfo *ni = new CNodeInfo();
                                ni->deserialize(msg);
                                tlkRows.append(*ni);
                            }
                        }
                        tlkRows.sort(CNodeInfo::compare);

                        StringBuffer path;
                        getPartFilename(*tlkDesc, 0, path);
                        ActPrintLog("INDEXWRITE: creating toplevel key file : %s", path.str());
                        try
                        {
                            open(*tlkDesc, true, helper->queryDiskRecordSize()->isVariableSize());
                            if (tlkRows.length())
                            {
                                CNodeInfo &lastNode = tlkRows.item(tlkRows.length()-1);
                                memset(lastNode.value, 0xff, lastNode.size);
                            }
                            ForEachItemIn(idx, tlkRows)
                            {
                                CNodeInfo &info = tlkRows.item(idx);
                                builder->processKeyData((char *)info.value, info.pos, info.size);
                            }
                            close(*tlkDesc, tlkCrc, true);
                        }
                        catch (CATCHALL)
                        {
                            abortSoon = true;
                            close(*tlkDesc, tlkCrc, true);
                            removeFiles(*partDesc);
                            throw;
                        }
                    }
                }
                else if (!isLocal && firstNode())
Ejemplo n.º 18
0
void CSteppedConjunctionOptimizer::beforeProcessing()
{
    //NB: This function is only called once, after we have decided it is worth processing.
    assertex(!eof);     // just check it isn't called more than once
    assertex(numInputs);

    bool hasDistance = (helper.getJoinFlags() & IHThorNWayMergeJoinArg::MJFhasdistance) != 0;
    for (unsigned i3 = 0; i3 < numInputs; i3++)
    {
        OrderedInput & next = *new OrderedInput(inputs.item(i3), i3, hasDistance);
        orderedInputs.append(next);
        if (next.canOptimizeOrder())
            numOptimizeInputs++;
    }
    //Sort so that inputs are ordered (priority-inputs, optimizable, non-optimizable)
    orderedInputs.sort(compareInitialInputOrder);

    //If only a single re-orderable input, treat it as unorderable.
    if (numOptimizeInputs == 1)
    {
        assertex(orderedInputs.item(numPriorityInputs).canOptimizeOrder());
        orderedInputs.item(numPriorityInputs).stopOptimizeOrder();
        numOptimizeInputs = 0;
    }

    maxOptimizeInput = numPriorityInputs + numOptimizeInputs;
    //If we know for sure the primary input, then tag it as worth reading ahead - otherwise it will be dynamically set later.
    if (numPriorityInputs > 0)
    {
        orderedInputs.item(0).setReadAhead(true);
        orderedInputs.item(0).setAlwaysReadExact();
    }

    //Work out the last input of known priority whis is read remotely.
    unsigned maxPriorityRemote = numPriorityInputs;
    while ((maxPriorityRemote >= 2) && !orderedInputs.item(maxPriorityRemote-1).readsRowsRemotely())
        maxPriorityRemote--;

    //If the second ordered input is known to be read remotely, then we want to send multiple seek requests at the same time.
    //MORE: Maybe we should consider doing this to all other inputs if only one priority input is known.
    if (maxPriorityRemote >= 2)
    {
        for (unsigned i=1; i < maxPriorityRemote; i++)
        {
            IMultipleStepSeekInfo * seekInfo = orderedInputs.item(i-1).createMutipleReadWrapper();
            orderedInputs.item(i).createMultipleSeekWrapper(seekInfo);
        }
    }
    
    //MORE: If some inputs have known priroity, and other remote inputs don't, then we could consider 
    //      connecting the unknown inputs to the last known inputs.
    ForEachItemIn(i4, joins)
        joins.item(i4).markRestrictedJoin(numEqualFields);

    assertex(helper.getJoinFlags() & IHThorNWayMergeJoinArg::MJFhasclearlow);       // Don't support (very) old workunits that don't define this..
    if (helper.getJoinFlags() & IHThorNWayMergeJoinArg::MJFhasclearlow)
    {
        RtlDynamicRowBuilder rowBuilder(inputAllocator);
        size32_t size = helper.createLowInputRow(rowBuilder);
        lowestSeekRow = rowBuilder.finalizeRowClear(size);
    }
}
Ejemplo n.º 19
0
    const void* KafkaStreamedDataset::nextRow()
    {
        const void* result = NULL;
        __int32 maxAttempts = 10;   //!< Maximum number of tries if local queue is full
        __int32 timeoutWait = 100;  //!< Amount of time to wait between retries
        __int32 attemptNum = 0;

        if (maxRecords <= 0 || consumedRecCount < maxRecords)
        {
            RdKafka::Message* messageObjPtr = NULL;
            bool messageConsumed = false;

            while (!messageConsumed && shouldRead && attemptNum < maxAttempts)
            {
                messageObjPtr = consumerPtr->getOneMessage(); // messageObjPtr must be deleted when we are through with it

                if (messageObjPtr)
                {
                    try
                    {
                        switch (messageObjPtr->err())
                        {
                            case RdKafka::ERR_NO_ERROR:
                                {
                                    RtlDynamicRowBuilder rowBuilder(resultAllocator);
                                    unsigned len = sizeof(__int32) + sizeof(__int64) + sizeof(size32_t) + messageObjPtr->len();
                                    byte* row = rowBuilder.ensureCapacity(len, NULL);

                                    // Populating this structure:
                                    //  EXPORT KafkaMessage := RECORD
                                    //      UNSIGNED4   partitionNum;
                                    //      UNSIGNED8   offset;
                                    //      STRING      message;
                                    //  END;

                                    *(__int32*)(row) = messageObjPtr->partition();
                                    *(__int64*)(row + sizeof(__int32)) = messageObjPtr->offset();
                                    *(size32_t*)(row + sizeof(__int32) + sizeof(__int64)) = messageObjPtr->len();
                                    memcpy(row + sizeof(__int32) + sizeof(__int64) + sizeof(size32_t), messageObjPtr->payload(), messageObjPtr->len());

                                    result = rowBuilder.finalizeRowClear(len);

                                    lastMsgOffset = messageObjPtr->offset();
                                    ++consumedRecCount;

                                    // Give opportunity for consumer to pull in any additional messages
                                    consumerPtr->handle()->poll(0);

                                    // Mark as loaded so we don't retry
                                    messageConsumed = true;
                                }
                                break;

                            case RdKafka::ERR__TIMED_OUT:
                                // No new messages arrived and we timed out waiting
                                ++attemptNum;
                                consumerPtr->handle()->poll(timeoutWait);
                                break;

                            case RdKafka::ERR__PARTITION_EOF:
                                // We reached the end of the messages in the partition
                                if (traceLevel > 4)
                                {
                                    DBGLOG("Kafka: EOF reading message from partition %d", messageObjPtr->partition());
                                }
                                shouldRead = false;
                                break;

                            case RdKafka::ERR__UNKNOWN_PARTITION:
                                // Unknown partition; don't throw an error here because
                                // in some configurations (e.g. more Thor slaves than
                                // partitions) not all consumers will have a partition
                                // to read
                                if (traceLevel > 4)
                                {
                                    DBGLOG("Kafka: Unknown partition while trying to read");
                                }
                                shouldRead = false;
                                break;

                            case RdKafka::ERR__UNKNOWN_TOPIC:
                                throw MakeStringException(-1, "Kafka: Error while reading message: '%s'", messageObjPtr->errstr().c_str());
                                break;
                        }
                    }
                    catch (...)
                    {
                        delete(messageObjPtr);
                        throw;
                    }

                    delete(messageObjPtr);
                    messageObjPtr = NULL;
                }
            }
        }

        return result;
    }
Ejemplo n.º 20
0
    virtual void process() override
    {
        ActPrintLog("INDEXWRITE: Start");
        init();

        IRowStream *stream = inputStream;
        ThorDataLinkMetaInfo info;
        input->getMetaInfo(info);
        outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize()));
        start();
        if (refactor)
        {
            assertex(isLocal);
            if (active)
            {
                unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0);
                assertex(0 == container.queryJob().querySlaves() % targetWidth);
                unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth;
                unsigned myPart = queryJobChannel().queryMyRank();

                IArrayOf<IRowStream> streams;
                streams.append(*LINK(stream));
                --partsPerNode;

 // Should this be merging 1,11,21,31 etc.
                unsigned p=0;
                unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1));
                for (; p<partsPerNode; p++)
                {
                    streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon));
                }
                ICompare *icompare = helper->queryCompare();
                assertex(icompare);
                Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter;
                myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter));
                stream = myInputStream;
            }
            else // serve nodes, creating merged parts
                rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag));
        }
        processed = THORDATALINK_STARTED;

        // single part key support
        // has to serially pull all data fron nodes 2-N
        // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature.
        unsigned node = queryJobChannel().queryMyRank();
        if (singlePartKey)
        {
            if (1 == node)
            {
                try
                {
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    loop
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        if (abortSoon) return;
                        processRow(row);
                    }

                    unsigned node = 2;
                    while (node <= container.queryJob().querySlaves())
                    {
                        Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input);
                        CMessageBuffer mb;
                        Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb);
                        CThorStreamDeserializerSource rowSource;
                        rowSource.setStream(stream);
                        bool successSR;
                        loop
                        {
                            {
                                BooleanOnOff tf(receivingTag2);
                                successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2);
                            }
                            if (successSR)
                            {
                                if (rowSource.eos())
                                    break;
                                Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input);
                                do
                                {
                                    RtlDynamicRowBuilder rowBuilder(allocator);
                                    size32_t sz = deserializer->deserialize(rowBuilder, rowSource);
                                    OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz);
                                    processRow(fRow);
                                }
                                while (!rowSource.eos());
                            }
                        }
                        node++;
                    }
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, true);
                    throw;
                }
                close(*partDesc, partCrc, true);
                doStopInput();
            }
            else
            {
                CMessageBuffer mb;
                CMemoryRowSerializer mbs(mb);
                Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input);
                loop
                {
                    BooleanOnOff tf(receivingTag2);
                    if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more..
                    {
                        if (abortSoon) break;
                        mb.clear();
                        do
                        {
                            OwnedConstThorRow row = inputStream->ungroupedNextRow();
                            if (!row) break;
                            serializer->serialize(mbs, (const byte *)row.get());
                        } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row
                        if (!queryJobChannel().queryJobComm().reply(mb))
                            throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node);
                        if (0 == mb.length())
                            break;
                    }
                }
            }
        }
Ejemplo n.º 21
0
void RtlVariableDatasetBuilder::deserializeRow(IOutputRowDeserializer & deserializer, IRowDeserializerSource & in)
{
    createRow();
    size32_t rowSize = deserializer.deserialize(rowBuilder(), in);
    finalizeRow(rowSize);
}