Example #1
0
    virtual void init()
    {
        HashDistributeMasterBase::init();

        // JCSMORE should common up some with indexread
        IHThorKeyedDistributeArg *helper = (IHThorKeyedDistributeArg *)queryHelper();

        StringBuffer scoped;
        OwnedRoxieString indexFileName(helper->getIndexFileName());
        queryThorFileManager().addScope(container.queryJob(), indexFileName, scoped);
        file.setown(queryThorFileManager().lookup(container.queryJob(), indexFileName));
        if (!file)
            throw MakeActivityException(this, 0, "KeyedDistribute: Failed to find key: %s", scoped.str());
        if (0 == file->numParts())
            throw MakeActivityException(this, 0, "KeyedDistribute: Can't distribute based on an empty key: %s", scoped.str());

        checkFormatCrc(this, file, helper->getFormatCrc(), true);
        Owned<IFileDescriptor> fileDesc = file->getFileDescriptor();
        Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1);
        if (!tlkDesc->queryProperties().hasProp("@kind") || 0 != stricmp("topLevelKey", tlkDesc->queryProperties().queryProp("@kind")))
            throw MakeActivityException(this, 0, "Cannot distribute using a non-distributed key: '%s'", scoped.str());
        unsigned location;
        OwnedIFile iFile;
        StringBuffer filePath;
        if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath))
            throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", file->queryLogicalName());
        OwnedIFileIO iFileIO = iFile->open(IFOread);
        assertex(iFileIO);

        tlkMb.append(iFileIO->size());
        ::read(iFileIO, 0, (size32_t)iFileIO->size(), tlkMb);

        queryThorFileManager().noteFileRead(container.queryJob(), file);
    }
Example #2
0
void sendPartialCount(CSlaveActivity &activity, rowcount_t partialCount)
{
    CMessageBuffer msg;
    msg.append(partialCount);
    if (!activity.queryContainer().queryJob().queryJobComm().send(msg, 0, activity.queryMpTag(), 5000))
        throw MakeThorException(0, "Failed to give partial result to master");
}
Example #3
0
 virtual void validateFile(IDistributedFile *file)
 {
     IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper();
     bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags());
     bool isGrouped = fileDesc->isGrouped();
     if (isGrouped != codeGenGrouped)
     {
         Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped");
         queryJobChannel().fireException(e);
     }
     IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta();
     if (recordSize->isFixedSize()) // fixed size
     {
         if (0 != fileDesc->queryProperties().getPropInt("@recordSize"))
         {
             size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize");
             if (isGrouped)
                 rSz--; // eog byte not to be included in this test.
             if (rSz != recordSize->getMinRecordSize())
                 throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize());
         }
         if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags()))
         {
             size32_t rSz = recordSize->getMinRecordSize();
             if (isGrouped) rSz++;
             if (rSz >= MIN_ROWCOMPRESS_RECSIZE)
             {
                 Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get());
                 queryJobChannel().fireException(e);
             }
         }
     }
     if (0 == (TDRnocrccheck & helper->getFlags()))
         checkFormatCrc(this, file, helper->getFormatCrc(), false);
 }
Example #4
0
void CPartialResultAggregator::sendResult(const void *row)
{
    CMessageBuffer mb;
    if (row)
    {
        CMemoryRowSerializer mbs(mb);
        activity.queryRowSerializer()->serialize(mbs,(const byte *)row);
    }
    if (!activity.queryContainer().queryJob().queryJobComm().send(mb, 0, activity.queryMpTag(), 5000))
        throw MakeThorException(0, "Failed to give partial result to master");
}
Example #5
0
 virtual unsigned hash(const void *data)
 {
     if (1 == count)
         return offsetTable[0].index;
     offset_t fpos = iFetchHandler.extractFpos(data);
     if (isLocalFpos(fpos))
         return getLocalFposPart(fpos);
     const void *result = bsearch(&fpos, offsetTable, count, sizeof(FPosTableEntry), slaveLookup);
     if (!result)
         throw MakeThorException(TE_FetchOutOfRange, "FETCH: Offset not found in offset table; fpos=%"I64F"d", fpos);
     return ((FPosTableEntry *)result)->index;
 }
Example #6
0
 virtual void process()
 {
     if (totalCountKnown) return;
     if (container.queryLocalOrGrouped())
         return;
     totalCount = ::getCount(*this, container.queryJob().querySlaves(), stopAfter, mpTag);
     if (totalCount > stopAfter)
         totalCount = stopAfter;
     CMessageBuffer msg;
     msg.append(totalCount);
     if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000))
         throw MakeThorException(0, "Failed to give result to slave");
 }
Example #7
0
    virtual void process()
    {
        CMasterActivity::process();

        bool results = false;
        unsigned nslaves = container.queryJob().querySlaves();
        while (nslaves--)
        {
            CMessageBuffer mb;
            if (abortSoon || !receiveMsg(mb, RANK_ALL, replyTag, NULL)) break;
            StringBuffer str;
            mb.getSender().getUrlStr(str);
            size32_t sz;
            mb.read(sz);
            if (sz)
            {
                if (results)
                    throw MakeThorException(TE_UnexpectedMultipleSlaveResults, "Received greater than one result from slaves");
                IHThorRemoteResultArg *helper = (IHThorRemoteResultArg *)queryHelper();
                Owned<IThorRowInterfaces> resultRowIf = createRowInterfaces(helper->queryOutputMeta());
                CThorStreamDeserializerSource mds(sz, mb.readDirect(sz));
                RtlDynamicRowBuilder rowBuilder(resultRowIf->queryRowAllocator());
                size32_t sz = resultRowIf->queryRowDeserializer()->deserialize(rowBuilder, mds);
                OwnedConstThorRow result = rowBuilder.finalizeRowClear(sz);
                helper->sendResult(result);
                results = true;
            }
        }
        if (!results && !abortSoon)
        {
            ActPrintLog("WARNING: no results");
            IHThorRemoteResultArg *helper = (IHThorRemoteResultArg *)queryHelper();
            //helper->sendResult(NULL);
            // Jake I think this always cores (so raise exception instead)
            throw MakeThorException(TE_UnexpectedMultipleSlaveResults, "Received no results from slaves");

        }
    }
Example #8
0
    virtual void process()
    {
        IRecordSize *recordSize = helper->queryOutputMeta();

        Owned<IThorRowInterfaces> rowIf = createThorRowInterfaces(queryRowManager(), helper->queryOutputMeta(), queryId(), queryCodeContext());
        OwnedConstThorRow result = getAggregate(*this, container.queryJob().querySlaves(), *rowIf, *helper, mpTag);
        if (!result)
            return;
        CMessageBuffer msg;
        CMemoryRowSerializer mbs(msg);
        rowIf->queryRowSerializer()->serialize(mbs, (const byte *)result.get());
        if (!queryJobChannel().queryJobComm().send(msg, 1, mpTag, 5000))
            throw MakeThorException(0, "Failed to give result to slave");
    }
    IRowStream * doGlobalSelfJoin()
    {
#if THOR_TRACE_LEVEL > 5
        ActPrintLog("SELFJOIN: Performing global self-join");
#endif
        sorter->Gather(::queryRowInterfaces(input), input, compare, NULL, NULL, keyserializer, NULL, false, isUnstable(), abortSoon, NULL);
        stopInput(input);
        input = NULL;
        if(abortSoon)
        {
            barrier->cancel();
            return NULL;
        }
        if (!barrier->wait(false)) {
            Sleep(1000); // let original error through
            throw MakeThorException(TE_BarrierAborted,"SELFJOIN: Barrier Aborted");
        }
        rowcount_t totalrows;
        return sorter->startMerge(totalrows);
    }
Example #10
0
    void process()
    {
        CWorkUnitWriteMasterBase::process();

        unsigned nslaves = container.queryJob().querySlaves();

        CMessageBuffer mb;
        unsigned s=0;
        for (; s<nslaves; s++)
        {
            loop
            {
                if (!container.queryJob().queryJobComm().send(mb, s+1, mpTag)) return;
                if (!receiveMsg(mb, s+1, mpTag)) return;
                if (0 == mb.length())
                    break;
                unsigned numGot;
                mb.read(numGot);
                unsigned l=mb.remaining();
                if (workunitWriteLimit && totalSize+resultData.length()+l > workunitWriteLimit)
                {
                    StringBuffer errMsg("Dataset too large to output to workunit (limit is set to ");
                    errMsg.append(workunitWriteLimit/0x100000).append(") megabytes, in result (");
                    if (resultName.length())
                        errMsg.append("name=").append(resultName);
                    else
                        errMsg.append("sequence=").append(resultSeq);
                    errMsg.append(")");
                    throw MakeThorException(TE_WorkUnitWriteLimitExceeded, "%s", errMsg.str());
                }
                resultData.append(l, mb.readDirect(l));
                mb.clear();
                numResults += numGot;

                if (-1 != flushThreshold && resultData.length() >= (unsigned)flushThreshold)
                    flushResults();
            }
        }
        flushResults(true);
    }
    virtual void process() override
    {
        ActPrintLog("INDEXWRITE: Start");
        init();

        IRowStream *stream = inputStream;
        ThorDataLinkMetaInfo info;
        input->getMetaInfo(info);
        outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize()));
        start();
        if (refactor)
        {
            assertex(isLocal);
            if (active)
            {
                unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0);
                assertex(0 == container.queryJob().querySlaves() % targetWidth);
                unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth;
                unsigned myPart = queryJobChannel().queryMyRank();

                IArrayOf<IRowStream> streams;
                streams.append(*LINK(stream));
                --partsPerNode;

 // Should this be merging 1,11,21,31 etc.
                unsigned p=0;
                unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1));
                for (; p<partsPerNode; p++)
                {
                    streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon));
                }
                ICompare *icompare = helper->queryCompare();
                assertex(icompare);
                Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter;
                myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter));
                stream = myInputStream;
            }
            else // serve nodes, creating merged parts
                rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag));
        }
        processed = THORDATALINK_STARTED;

        // single part key support
        // has to serially pull all data fron nodes 2-N
        // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature.
        unsigned node = queryJobChannel().queryMyRank();
        if (singlePartKey)
        {
            if (1 == node)
            {
                try
                {
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    loop
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        if (abortSoon) return;
                        processRow(row);
                    }

                    unsigned node = 2;
                    while (node <= container.queryJob().querySlaves())
                    {
                        Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input);
                        CMessageBuffer mb;
                        Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb);
                        CThorStreamDeserializerSource rowSource;
                        rowSource.setStream(stream);
                        bool successSR;
                        loop
                        {
                            {
                                BooleanOnOff tf(receivingTag2);
                                successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2);
                            }
                            if (successSR)
                            {
                                if (rowSource.eos())
                                    break;
                                Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input);
                                do
                                {
                                    RtlDynamicRowBuilder rowBuilder(allocator);
                                    size32_t sz = deserializer->deserialize(rowBuilder, rowSource);
                                    OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz);
                                    processRow(fRow);
                                }
                                while (!rowSource.eos());
                            }
                        }
                        node++;
                    }
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, true);
                    throw;
                }
                close(*partDesc, partCrc, true);
                doStopInput();
            }
            else
            {
                CMessageBuffer mb;
                CMemoryRowSerializer mbs(mb);
                Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input);
                loop
                {
                    BooleanOnOff tf(receivingTag2);
                    if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more..
                    {
                        if (abortSoon) break;
                        mb.clear();
                        do
                        {
                            OwnedConstThorRow row = inputStream->ungroupedNextRow();
                            if (!row) break;
                            serializer->serialize(mbs, (const byte *)row.get());
                        } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row
                        if (!queryJobChannel().queryJobComm().reply(mb))
                            throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node);
                        if (0 == mb.length())
                            break;
                    }
                }
            }
        }
    void init(MemoryBuffer &data, MemoryBuffer &slaveData)
    {
        isLocal = 0 != (TIWlocal & helper->getFlags());

        mpTag = container.queryJob().deserializeMPTag(data);
        mpTag2 = container.queryJob().deserializeMPTag(data);
        data.read(active);
        if (active)
        {
            data.read(logicalFilename);
            partDesc.setown(deserializePartFileDescriptor(data));
        }

        data.read(singlePartKey);
        data.read(refactor);
        if (singlePartKey)
            buildTlk = false;
        else
        {
            data.read(buildTlk);
            if (firstNode())
            {
                if (buildTlk)
                    tlkDesc.setown(deserializePartFileDescriptor(data));
                else if (!isLocal) // exising tlk then..
                {
                    OwnedRoxieString diName(helper->getDistributeIndexName());
                    assertex(diName.get());
                    tlkDesc.setown(deserializePartFileDescriptor(data));
                    unsigned c;
                    data.read(c);
                    while (c--)
                    {
                        RemoteFilename rf;
                        rf.deserialize(data);
                        if (!existingTlkIFile)
                        {
                            Owned<IFile> iFile = createIFile(rf);
                            if (iFile->exists())
                                existingTlkIFile.set(iFile);
                        }
                    }
                    if (!existingTlkIFile)
                        throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", diName.get());
                }
            }
        }

        IOutputMetaData * diskSize = helper->queryDiskRecordSize();
        assertex(!(diskSize->getMetaFlags() & MDFneedserializedisk));
        if (diskSize->isVariableSize())
        {
            if (TIWmaxlength & helper->getFlags())
                maxDiskRecordSize = helper->getMaxKeySize();
            else
                maxDiskRecordSize = KEYBUILD_MAXLENGTH; //Current default behaviour, could be improved in the future
        }
        else
            maxDiskRecordSize = diskSize->getFixedSize();
        reportOverflow = false;
    }
Example #13
0
 void start()
 {
     ActivityTimer s(totalCycles, timeActivities);
     input = inputs.item(0);
     try
     {
         try { 
             startInput(input); 
         }
         catch (IException *e)
         {
             fireException(e);
             barrier->cancel();
             throw;
         }
         catch (CATCHALL)
         {
             Owned<IException> e = MakeActivityException(this, 0, "Unknown exception starting sort input");
             fireException(e);
             barrier->cancel();
             throw;
         }
         dataLinkStart();
         
         Linked<IRowInterfaces> rowif = queryRowInterfaces(input);
         Owned<IRowInterfaces> auxrowif = createRowInterfaces(helper->querySortedRecordSize(),queryActivityId(),queryCodeContext());
         sorter->Gather(
             rowif,
             input,
             helper->queryCompare(),
             helper->queryCompareLeftRight(),
             NULL,helper->querySerialize(),
             NULL,
             false,
             isUnstable(),
             abortSoon,
             auxrowif);
         stopInput(input);
         input = NULL;
         if (abortSoon)
         {
             ActPrintLogEx(&queryContainer(), thorlog_null, MCwarning, "MSortSlaveActivity::start aborting");
             barrier->cancel();
             return;
         }
     }
     catch (IException *e)
     {
         fireException(e);
         barrier->cancel();
         throw;
     }
     catch (CATCHALL)
     {
         Owned<IException> e = MakeActivityException(this, 0, "Unknown exception gathering sort input");
         fireException(e);
         barrier->cancel();
         throw;
     }
     ActPrintLog("SORT waiting barrier.1");
     if (!barrier->wait(false)) {
         Sleep(1000); // let original error through
         throw MakeThorException(TE_BarrierAborted,"SORT: Barrier Aborted");
     }
     ActPrintLog("SORT barrier.1 raised");
     output.setown(sorter->startMerge(totalrows));
 }
Example #14
0
void CWriteMasterBase::publish()
{
    if (published) return;
    published = true;
    if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp)))
        updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed);

    IPropertyTree &props = fileDesc->queryProperties();
    props.setPropInt64("@recordCount", recordsProcessed);
    if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints())
    {
        if (0 != (diskHelperBase->getFlags() & TDWexpires))
            setExpiryTime(props, diskHelperBase->getExpiryDays());
        if (TDWupdate & diskHelperBase->getFlags())
        {
            unsigned eclCRC;
            unsigned __int64 totalCRC;
            diskHelperBase->getUpdateCRCs(eclCRC, totalCRC);
            props.setPropInt("@eclCRC", eclCRC);
            props.setPropInt64("@totalCRC", totalCRC);
        }
    }
    container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters);
    if (!dlfn.isExternal())
    {
        bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary);
        if (!temporary && (queryJob().querySlaves() < fileDesc->numParts()))
        {
            // create empty parts for a fileDesc being published that is larger than this clusters
            size32_t recordSize = 0;
            IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
            if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
            {
                recordSize = diskRowMeta->getMinRecordSize();
                if (0 != (diskHelperBase->getFlags() & TDXgrouped))
                    recordSize += 1;
            }
            unsigned compMethod = COMPRESS_METHOD_LZW;
            // rowdiff used if recordSize > 0, else fallback to compMethod
            if (getOptBool(THOROPT_COMP_FORCELZW, false))
            {
                recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod.
                compMethod = COMPRESS_METHOD_LZW;
            }
            else if (getOptBool(THOROPT_COMP_FORCEFLZ, false))
                compMethod = COMPRESS_METHOD_FASTLZ;
            else if (getOptBool(THOROPT_COMP_FORCELZ4, false))
                compMethod = COMPRESS_METHOD_LZ4;
            bool blockCompressed;
            bool compressed = fileDesc->isCompressed(&blockCompressed);
            for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++)
            {
                StringBuffer clusterName;
                fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore());
                PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str());
                unsigned p=0;
                while (p<fileDesc->numParts())
                {
                    if (p == targetOffset)
                        p += queryJob().querySlaves();
                    IPartDescriptor *partDesc = fileDesc->queryPart(p);
                    CDateTime createTime, modifiedTime;
                    for (unsigned c=0; c<partDesc->numCopies(); c++)
                    {
                        RemoteFilename rfn;
                        partDesc->getFilename(c, rfn);
                        StringBuffer path;
                        rfn.getPath(path);
                        try
                        {
                            ensureDirectoryForFile(path.str());
                            OwnedIFile iFile = createIFile(path.str());
                            Owned<IFileIO> iFileIO;
                            if (compressed)
                                iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod));
                            else
                                iFileIO.setown(iFile->open(IFOcreate));
                            dbgassertex(iFileIO.get());
                            iFileIO.clear();
                            // ensure copies have matching datestamps, as they would do normally (backupnode expects it)
                            if (partDesc->numCopies() > 1)
                            {
                                if (0 == c)
                                    iFile->getTime(&createTime, &modifiedTime, NULL);
                                else
                                    iFile->setTime(&createTime, &modifiedTime, NULL);
                            }
                        }
                        catch (IException *e)
                        {
                            if (0 == c)
                                throw;
                            Owned<IThorException> e2 = MakeThorException(e);
                            e->Release();
                            e2->setAction(tea_warning);
                            queryJob().fireException(e2);
                        }
                    }
                    partDesc->queryProperties().setPropInt64("@size", 0);
                    p++;
                }
                clusterIdx++;
            }
        }
        queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL);
    }
}
Example #15
0
    virtual void init()
    {
        CMasterActivity::init();
        OwnedRoxieString indexFileName(helper->getIndexFileName());
        Owned<IDistributedFile> dataFile;
        Owned<IDistributedFile> indexFile = queryThorFileManager().lookup(container.queryJob(), indexFileName, false, 0 != (helper->getJoinFlags() & JFindexoptional), true);

        unsigned keyReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJKRR", 0);
        if (!keyReadWidth || keyReadWidth>container.queryJob().querySlaves())
            keyReadWidth = container.queryJob().querySlaves();
        

        initMb.clear();
        initMb.append(indexFileName.get());
        if (helper->diskAccessRequired())
            numTags += 2;
        initMb.append(numTags);
        unsigned t=0;
        for (; t<numTags; t++)
        {
            tags[t] = container.queryJob().allocateMPTag();
            initMb.append(tags[t]);
        }
        bool keyHasTlk = false;
        if (indexFile)
        {
            unsigned numParts = 0;
            localKey = indexFile->queryAttributes().getPropBool("@local");

            if (container.queryLocalData() && !localKey)
                throw MakeActivityException(this, 0, "Keyed Join cannot be LOCAL unless supplied index is local");

            checkFormatCrc(this, indexFile, helper->getIndexFormatCrc(), true);
            Owned<IFileDescriptor> indexFileDesc = indexFile->getFileDescriptor();
            IDistributedSuperFile *superIndex = indexFile->querySuperFile();
            unsigned superIndexWidth = 0;
            unsigned numSuperIndexSubs = 0;
            if (superIndex)
            {
                numSuperIndexSubs = superIndex->numSubFiles(true);
                bool first=true;
                // consistency check
                Owned<IDistributedFileIterator> iter = superIndex->getSubFileIterator(true);
                ForEach(*iter)
                {
                    IDistributedFile &f = iter->query();
                    unsigned np = f.numParts()-1;
                    IDistributedFilePart &part = f.queryPart(np);
                    const char *kind = part.queryAttributes().queryProp("@kind");
                    bool hasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind); // if last part not tlk, then deemed local (might be singlePartKey)
                    if (first)
                    {
                        first = false;
                        keyHasTlk = hasTlk;
                        superIndexWidth = f.numParts();
                        if (keyHasTlk)
                            --superIndexWidth;
                    }
                    else
                    {
                        if (hasTlk != keyHasTlk)
                            throw MakeActivityException(this, 0, "Local/Single part keys cannot be mixed with distributed(tlk) keys in keyedjoin");
                        if (keyHasTlk && superIndexWidth != f.numParts()-1)
                            throw MakeActivityException(this, 0, "Super sub keys of different width cannot be mixed with distributed(tlk) keys in keyedjoin");
                    }
                }
                if (keyHasTlk)
                    numParts = superIndexWidth * numSuperIndexSubs;
                else
                    numParts = superIndex->numParts();
            }
            else
            {
                numParts = indexFile->numParts();
                if (numParts)
                {
                    const char *kind = indexFile->queryPart(indexFile->numParts()-1).queryAttributes().queryProp("@kind");
                    keyHasTlk = NULL != kind && 0 == stricmp("topLevelKey", kind);
                    if (keyHasTlk)
                        --numParts;
                }
            }
            if (numParts)
            {
                initMb.append(numParts);
                initMb.append(superIndexWidth); // 0 if not superIndex
                initMb.append((superIndex && superIndex->isInterleaved()) ? numSuperIndexSubs : 0);
                unsigned p=0;
                UnsignedArray parts;
                for (; p<numParts; p++)
                    parts.append(p);
                indexFileDesc->serializeParts(initMb, parts);
                if (localKey)
                    keyHasTlk = false; // not used
                initMb.append(keyHasTlk);
                if (keyHasTlk)
                {
                    if (numSuperIndexSubs)
                        initMb.append(numSuperIndexSubs);
                    else
                        initMb.append((unsigned)1);

                    Owned<IDistributedFileIterator> iter;
                    IDistributedFile *f;
                    if (superIndex)
                    {
                        iter.setown(superIndex->getSubFileIterator(true));
                        f = &iter->query();
                    }
                    else
                        f = indexFile;
                    loop
                    {
                        unsigned location;
                        OwnedIFile iFile;
                        StringBuffer filePath;
                        Owned<IFileDescriptor> fileDesc = f->getFileDescriptor();
                        Owned<IPartDescriptor> tlkDesc = fileDesc->getPart(fileDesc->numParts()-1);
                        if (!getBestFilePart(this, *tlkDesc, iFile, location, filePath))
                            throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", f->queryLogicalName());
                        OwnedIFileIO iFileIO = iFile->open(IFOread);
                        assertex(iFileIO);

                        size32_t tlkSz = (size32_t)iFileIO->size();
                        initMb.append(tlkSz);
                        ::read(iFileIO, 0, tlkSz, initMb);

                        if (!iter || !iter->next())
                            break;
                        f = &iter->query();
                    }
                }
                if (helper->diskAccessRequired())
                {
                    OwnedRoxieString fetchFilename(helper->getFileName());
                    if (fetchFilename)
                    {
                        dataFile.setown(queryThorFileManager().lookup(container.queryJob(), fetchFilename, false, 0 != (helper->getFetchFlags() & FFdatafileoptional), true));
                        if (dataFile)
                        {
                            if (superIndex)
                                throw MakeActivityException(this, 0, "Superkeys and full keyed joins are not supported");
                            Owned<IFileDescriptor> dataFileDesc = getConfiguredFileDescriptor(*dataFile);
                            void *ekey;
                            size32_t ekeylen;
                            helper->getFileEncryptKey(ekeylen,ekey);
                            bool encrypted = dataFileDesc->queryProperties().getPropBool("@encrypted");
                            if (0 != ekeylen)
                            {
                                memset(ekey,0,ekeylen);
                                free(ekey);
                                if (!encrypted)
                                {
                                    Owned<IException> e = MakeActivityWarning(&container, TE_EncryptionMismatch, "Ignoring encryption key provided as file '%s' was not published as encrypted", helper->getFileName());
                                    container.queryJob().fireException(e);
                                }
                            }
                            else if (encrypted)
                                throw MakeActivityException(this, 0, "File '%s' was published as encrypted but no encryption key provided", fetchFilename.get());
                            unsigned dataReadWidth = (unsigned)container.queryJob().getWorkUnitValueInt("KJDRR", 0);
                            if (!dataReadWidth || dataReadWidth>container.queryJob().querySlaves())
                                dataReadWidth = container.queryJob().querySlaves();
                            Owned<IGroup> grp = container.queryJob().querySlaveGroup().subset((unsigned)0, dataReadWidth);
                            dataFileMapping.setown(getFileSlaveMaps(dataFile->queryLogicalName(), *dataFileDesc, container.queryJob().queryUserDescriptor(), *grp, false, false, NULL));
                            dataFileMapping->serializeFileOffsetMap(offsetMapMb.clear());
                        }
                        else
                            indexFile.clear();
                    }
                }
            }
            else
Example #16
0
    void prepareKey(IDistributedFile *index)
    {
        IDistributedFile *f = index;
        IDistributedSuperFile *super = f->querySuperFile();

        unsigned nparts = f->numParts(); // includes tlks if any, but unused in array
        performPartLookup.ensure(nparts);

        bool checkTLKConsistency = (nullptr != super) && !localKey && (0 != (TIRsorted & indexBaseHelper->getFlags()));
        if (nofilter)
        {
            while (nparts--) performPartLookup.append(true);
            if (!checkTLKConsistency) return;
        }
        else
        {
            while (nparts--) performPartLookup.append(false); // parts to perform lookup set later
        }

        Owned<IDistributedFileIterator> iter;
        if (super)
        {
            iter.setown(super->getSubFileIterator(true));
            verifyex(iter->first());
            f = &iter->query();
        }
        unsigned width = f->numParts();
        if (!localKey)
            --width;
        assertex(width);
        unsigned tlkCrc = 0;
        bool first = true;
        unsigned superSubIndex=0;
        bool fileCrc = false, rowCrc = false;
        for (;;)
        {
            Owned<IDistributedFilePart> part = f->getPart(width);
            if (checkTLKConsistency)
            {
                unsigned _tlkCrc;
                if (part->getCrc(_tlkCrc))
                    fileCrc = true;
                else if (part->queryAttributes().hasProp("@crc")) // NB: key "@crc" is not a crc on the file, but data within.
                {
                    _tlkCrc = part->queryAttributes().getPropInt("@crc");
                    rowCrc = true;
                }
                else if (part->queryAttributes().hasProp("@tlkCrc")) // backward compat.
                {
                    _tlkCrc = part->queryAttributes().getPropInt("@tlkCrc");
                    rowCrc = true;
                }
                else
                {
                    if (rowCrc || fileCrc)
                    {
                        checkTLKConsistency = false;
                        Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, some crc attributes are missing", super->queryLogicalName());
                        queryJobChannel().fireException(e);
                    }
                }
                if (rowCrc && fileCrc)
                {
                    checkTLKConsistency = false;
                    Owned<IException> e = MakeActivityWarning(&container, 0, "Cannot validate that tlks in superfile %s match, due to mixed crc types.", super->queryLogicalName());
                    queryJobChannel().fireException(e);
                }
                if (checkTLKConsistency)
                {
                    if (first)
                    {
                        tlkCrc = _tlkCrc;
                        first = false;
                    }
                    else if (tlkCrc != _tlkCrc)
                        throw MakeActivityException(this, 0, "Sorted output on super files comprising of non coparitioned sub keys is not supported (TLK's do not match)");
                }
            }
            if (!nofilter)
            {
                Owned<IKeyIndex> keyIndex;
                unsigned copy;
                for (copy=0; copy<part->numCopies(); copy++)
                {
                    RemoteFilename rfn;
                    OwnedIFile ifile = createIFile(part->getFilename(rfn,copy));
                    if (ifile->exists())
                    {
                        StringBuffer remotePath;
                        rfn.getRemotePath(remotePath);
                        unsigned crc = 0;
                        part->getCrc(crc);
                        keyIndex.setown(createKeyIndex(remotePath.str(), crc, false, false));
                        break;
                    }
                }
                if (!keyIndex)
                    throw MakeThorException(TE_FileNotFound, "Top level key part does not exist, for key: %s", index->queryLogicalName());

                unsigned fixedSize = indexBaseHelper->queryDiskRecordSize()->querySerializedDiskMeta()->getFixedSize(); // used only if fixed
                Owned <IKeyManager> tlk = createLocalKeyManager(keyIndex, fixedSize, NULL);
                indexBaseHelper->createSegmentMonitors(tlk);
                tlk->finishSegmentMonitors();
                tlk->reset();
                while (tlk->lookup(false))
                {
                    if (tlk->queryFpos())
                        performPartLookup.replace(true, (aindex_t)(super?super->numSubFiles(true)*(tlk->queryFpos()-1)+superSubIndex:tlk->queryFpos()-1));
                }
            }
            if (!super||!iter->next())
                break;
            superSubIndex++;
            f = &iter->query();
            if (width != f->numParts()-1)
                throw MakeActivityException(this, 0, "Super key %s, with mixture of sub key width are not supported.", f->queryLogicalName());
        }
    }
    virtual void process() override
    {
        ActPrintLog("INDEXWRITE: Start");
        init();

        IRowStream *stream = inputStream;
        ThorDataLinkMetaInfo info;
        input->getMetaInfo(info);
        outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize()));
        start();
        if (refactor)
        {
            assertex(isLocal);
            if (active)
            {
                unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0);
                assertex(0 == container.queryJob().querySlaves() % targetWidth);
                unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth;
                unsigned myPart = queryJobChannel().queryMyRank();

                IArrayOf<IRowStream> streams;
                streams.append(*LINK(stream));
                --partsPerNode;

 // Should this be merging 1,11,21,31 etc.
                unsigned p=0;
                unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1));
                for (; p<partsPerNode; p++)
                {
                    streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon));
                }
                ICompare *icompare = helper->queryCompare();
                assertex(icompare);
                Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter;
                myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter));
                stream = myInputStream;
            }
            else // serve nodes, creating merged parts
                rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag));
        }
        processed = THORDATALINK_STARTED;

        // single part key support
        // has to serially pull all data fron nodes 2-N
        // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature.
        unsigned node = queryJobChannel().queryMyRank();
        if (singlePartKey)
        {
            if (1 == node)
            {
                try
                {
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    for (;;)
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        if (abortSoon) return;
                        processRow(row);
                    }

                    unsigned node = 2;
                    while (node <= container.queryJob().querySlaves())
                    {
                        Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input);
                        CMessageBuffer mb;
                        Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb);
                        CThorStreamDeserializerSource rowSource;
                        rowSource.setStream(stream);
                        bool successSR;
                        for (;;)
                        {
                            {
                                BooleanOnOff tf(receivingTag2);
                                successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2);
                            }
                            if (successSR)
                            {
                                if (rowSource.eos())
                                    break;
                                Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input);
                                do
                                {
                                    RtlDynamicRowBuilder rowBuilder(allocator);
                                    size32_t sz = deserializer->deserialize(rowBuilder, rowSource);
                                    OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz);
                                    processRow(fRow);
                                }
                                while (!rowSource.eos());
                            }
                        }
                        node++;
                    }
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, true);
                    throw;
                }
                close(*partDesc, partCrc, true);
                stop();
            }
            else
            {
                CMessageBuffer mb;
                CMemoryRowSerializer mbs(mb);
                Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input);
                for (;;)
                {
                    BooleanOnOff tf(receivingTag2);
                    if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more..
                    {
                        if (abortSoon) break;
                        mb.clear();
                        do
                        {
                            OwnedConstThorRow row = inputStream->ungroupedNextRow();
                            if (!row) break;
                            serializer->serialize(mbs, (const byte *)row.get());
                        } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row
                        if (!queryJobChannel().queryJobComm().reply(mb))
                            throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node);
                        if (0 == mb.length())
                            break;
                    }
                }
            }
        }
        else
        {
            if (!refactor || active)
            {
                try
                {
                    StringBuffer partFname;
                    getPartFilename(*partDesc, 0, partFname);
                    ActPrintLog("INDEXWRITE: process: handling fname : %s", partFname.str());
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    ActPrintLog("INDEXWRITE: write");

                    BooleanOnOff tf(receiving);
                    if (!refactor || !active)
                        receiving = false;
                    do
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        processRow(row);
                    } while (!abortSoon);
                    ActPrintLog("INDEXWRITE: write level 0 complete");
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node);
                    throw;
                }
                close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node);
                stop();

                ActPrintLog("INDEXWRITE: Wrote %" RCPF "d records", processed & THORDATALINK_COUNT_MASK);

                if (buildTlk)
                {
                    ActPrintLog("INDEXWRITE: sending rows");
                    NodeInfoArray tlkRows;

                    CMessageBuffer msg;
                    if (firstNode())
                    {
                        if (processed & THORDATALINK_COUNT_MASK)
                        {
                            if (enableTlkPart0)
                                tlkRows.append(* new CNodeInfo(0, firstRow.get(), firstRowSize, totalCount));
                            tlkRows.append(* new CNodeInfo(1, lastRow.get(), lastRowSize, totalCount));
                        }
                    }
                    else
                    {
                        if (processed & THORDATALINK_COUNT_MASK)
                        {
                            CNodeInfo row(queryJobChannel().queryMyRank(), lastRow.get(), lastRowSize, totalCount);
                            row.serialize(msg);
                        }
                        queryJobChannel().queryJobComm().send(msg, 1, mpTag);
                    }

                    if (firstNode())
                    {
                        ActPrintLog("INDEXWRITE: Waiting on tlk to complete");

                        // JCSMORE if refactor==true, is rowsToReceive here right??
                        unsigned rowsToReceive = (refactor ? (tlkDesc->queryOwner().numParts()-1) : container.queryJob().querySlaves()) -1; // -1 'cos got my own in array already
                        ActPrintLog("INDEXWRITE: will wait for info from %d slaves before writing TLK", rowsToReceive);
                        while (rowsToReceive--)
                        {
                            msg.clear();
                            receiveMsg(msg, RANK_ALL, mpTag); // NH->JCS RANK_ALL_OTHER not supported for recv
                            if (abortSoon)
                                return;
                            if (msg.length())
                            {
                                CNodeInfo *ni = new CNodeInfo();
                                ni->deserialize(msg);
                                tlkRows.append(*ni);
                            }
                        }
                        tlkRows.sort(CNodeInfo::compare);

                        StringBuffer path;
                        getPartFilename(*tlkDesc, 0, path);
                        ActPrintLog("INDEXWRITE: creating toplevel key file : %s", path.str());
                        try
                        {
                            open(*tlkDesc, true, helper->queryDiskRecordSize()->isVariableSize());
                            if (tlkRows.length())
                            {
                                CNodeInfo &lastNode = tlkRows.item(tlkRows.length()-1);
                                memset(lastNode.value, 0xff, lastNode.size);
                            }
                            ForEachItemIn(idx, tlkRows)
                            {
                                CNodeInfo &info = tlkRows.item(idx);
                                builder->processKeyData((char *)info.value, info.pos, info.size);
                            }
                            close(*tlkDesc, tlkCrc, true);
                        }
                        catch (CATCHALL)
                        {
                            abortSoon = true;
                            close(*tlkDesc, tlkCrc, true);
                            removeFiles(*partDesc);
                            throw;
                        }
                    }
                }
                else if (!isLocal && firstNode())