Exemple #1
0
    virtual void init()
    {
        CSortBaseActivityMaster::init();
        IHThorSortArg *helper = (IHThorSortArg *)queryHelper();
        IHThorAlgorithm *algo = static_cast<IHThorAlgorithm *>(helper->selectInterface(TAIalgorithm_1));
        OwnedRoxieString algoname(algo->getAlgorithm());
        unsigned flags = algo->getAlgorithmFlags();
        if (algoname && (0 != stricmp(algoname, "quicksort")))
        {
            Owned<IException> e = MakeActivityException(this, 0, "Ignoring, unsupported sort order algorithm '%s'", algoname.get());
            reportExceptionToWorkunit(container.queryJob().queryWorkUnit(), e);
        }
        OwnedRoxieString cosortlogname(helper->getSortedFilename());
        if (cosortlogname&&*cosortlogname)
        {
            Owned<IDistributedFile> coSortFile = queryThorFileManager().lookup(container.queryJob(), cosortlogname);
            addReadFile(coSortFile);
            Owned<IFileDescriptor> fileDesc = coSortFile->getFileDescriptor();
            unsigned o;
            for (o=0; o<fileDesc->numParts(); o++)
            {
                Owned<IPartDescriptor> partDesc = fileDesc->getPart(o);
                if (cosortfilenames.length())
                    cosortfilenames.append("|");

                // JCSMORE - picking the primary here, means no automatic use of backup copy, could use RMF's possibly.
                getPartFilename(*partDesc, 0, cosortfilenames);
            }
        }
    }
 void removeFiles(IPartDescriptor &partDesc)
 {
     StringBuffer partFname;
     getPartFilename(partDesc, 0, partFname);
     Owned<IFile> primary = createIFile(partFname.str());
     try { primary->remove(); }
     catch (IException *e) { ActPrintLog(e, "Failed to remove file: %s", partFname.str()); e->Release(); }
     catch (CATCHALL) { ActPrintLog("Failed to remove: %s", partFname.str()); }
 }
void CDiskWriteSlaveActivityBase::process()
{
    calcFileCrc = false;
    uncompressedBytesWritten = 0;
    replicateDone = 0;
    StringBuffer tmpStr;
    fName.set(getPartFilename(*partDesc, 0, tmpStr).str());
    if (diskHelperBase->getFlags() & TDXtemporary && !container.queryJob().queryUseCheckpoints())
        container.queryTempHandler()->registerFile(fName, container.queryOwner().queryGraphId(), usageCount, true);
    try
    {
        ActPrintLog("handling fname : %s", fName.get());

        try
        {
            open();
            assertex(out||outraw);
            write();
        }
        catch (IException *)
        {
            abortSoon = true;
            try { close(); }
            catch (IException *e)
            {
                EXCLOG(e, "close()");
                e->Release();
            }
            throw;
        }
        catch (CATCHALL)
        {
            abortSoon = true;
            try { close(); }
            catch (IException *e)
            {
                EXCLOG(e, "close()");
                e->Release();
            }
            throw;
        }
        close();
    }
    catch (IException *)
    {
        calcFileCrc = false;
        throw;
    }
    catch(CATCHALL)
    {
        calcFileCrc = false;
        throw;
    }
    unsigned crc = compress?~0:fileCRC.get();
    ActPrintLog("Wrote %" RCPF "d records%s", processed & THORDATALINK_COUNT_MASK, calcFileCrc?StringBuffer(", crc=0x").appendf("%X", crc).str() : "");
}
 void close(IPartDescriptor &partDesc, unsigned &crc, bool addMeta=false)
 {
     StringBuffer partFname;
     getPartFilename(partDesc, 0, partFname);
     Owned<IException> e;
     try
     {
         if (builder)
         {
             if (addMeta && metadata)
             {
                 builder->finish(metadata, &crc);
             }
             else
                 builder->finish(&crc);
         }
     }
     catch (IException *_e)
     {
         ActPrintLog(_e, "Error closing file: %s", partFname.str());
         abortSoon = true;
         e.setown(_e);
     }
     catch (CATCHALL)
     {
         abortSoon = true;
         e.setown(MakeActivityException(this, 0, "INDEXWRITE: Error closing file: %s - unknown exception", partFname.str()));
     }
     try 
     { 
         metadata.clear();
         builder.clear(); 
     }
     catch (IException *_e)
     {
         ActPrintLog(_e, "Error closing file: %s", partFname.str());
         _e->Release();
     }
     if (abortSoon)
         removeFiles(partDesc);
     if (e)
         throw LINK(e);
 }
 void open(IPartDescriptor &partDesc, bool isTopLevel, bool isVariable)
 {
     StringBuffer partFname;
     getPartFilename(partDesc, 0, partFname);
     bool compress=false;
     OwnedIFileIO iFileIO = createMultipleWrite(this, partDesc, 0, TW_RenameToPrimary, compress, NULL, this, &abortSoon);
     Owned<IFileIOStream> out = createBufferedIOStream(iFileIO);
     ActPrintLog("INDEXWRITE: created fixed output stream %s", partFname.str());
     unsigned flags = COL_PREFIX;
     if (TIWrowcompress & helper->getFlags())
         flags |= HTREE_COMPRESSED_KEY|HTREE_QUICK_COMPRESSED_KEY;
     else if (!(TIWnolzwcompress & helper->getFlags()))
         flags |= HTREE_COMPRESSED_KEY;
     if (!isLocal)
         flags |= HTREE_FULLSORT_KEY;
     if (isVariable)
         flags |= HTREE_VARSIZE;
     buildUserMetadata(metadata);                
     buildLayoutMetadata(metadata);
     unsigned nodeSize = metadata ? metadata->getPropInt("_nodeSize", NODESIZE) : NODESIZE;
     builder.setown(createKeyBuilder(out, flags, maxDiskRecordSize, nodeSize, helper->getKeyedSize(), isTopLevel ? 0 : totalCount));
 }
Exemple #6
0
    virtual void process()
    {
        ActPrintLog("process");

        CMasterActivity::process();

        IHThorSortArg *helper = (IHThorSortArg *)queryHelper();
        StringBuffer skewV;
        double skewError;
        container.queryJob().getWorkUnitValue("overrideSkewError", skewV);
        if (skewV.length())
            skewError = atof(skewV.str());
        else
        {
            skewError = helper->getSkew();
            if (!skewError)
            {
                container.queryJob().getWorkUnitValue("defaultSkewError", skewV.clear());
                if (skewV.length())
                    skewError = atof(skewV.str());
            }
        }
        container.queryJob().getWorkUnitValue("defaultSkewWarning", skewV.clear());
        double defaultSkewWarning = skewV.length() ? atof(skewV.str()) : 0;
        double skewWarning = defaultSkewWarning;
        unsigned __int64 skewThreshold = container.queryJob().getWorkUnitValueInt("overrideSkewThreshold", 0);
        if (!skewThreshold)
        {
            skewThreshold = helper->getThreshold();         
            if (!skewThreshold)
                skewThreshold = container.queryJob().getWorkUnitValueInt("defaultSkewThreshold", 0);
        }
        StringBuffer cosortfilenames;
        const char *cosortlogname = helper->getSortedFilename();
        if (cosortlogname&&*cosortlogname) {

            Owned<IDistributedFile> file = queryThorFileManager().lookup(container.queryJob(), cosortlogname);
            Owned<IFileDescriptor> fileDesc = file->getFileDescriptor();
            queryThorFileManager().noteFileRead(container.queryJob(), file);
            unsigned o;
            for (o=0; o<fileDesc->numParts(); o++)
            {
                Owned<IPartDescriptor> partDesc = fileDesc->getPart(o);
                if (cosortfilenames.length())
                    cosortfilenames.append("|");

                // JCSMORE - picking the primary here, means no automatic use of backup copy, could use RMF's possibly.
                getPartFilename(*partDesc, 0, cosortfilenames);
            }
        }

        Owned<IRowInterfaces> rowif = createRowInterfaces(container.queryInput(0)->queryHelper()->queryOutputMeta(),queryActivityId(),queryCodeContext());
        Owned<IRowInterfaces> auxrowif = createRowInterfaces(helper->querySortedRecordSize(),queryActivityId(),queryCodeContext());
        try {   
            imaster->SortSetup(rowif,helper->queryCompare(),helper->querySerialize(),cosortfilenames.length()!=0,true,cosortfilenames.toCharArray(),auxrowif);
            if (barrier->wait(false)) { // local sort complete
                size32_t maxdeviance=globals->getPropInt("@sort_max_deviance", 10*1024*1024);
                if (!imaster->Sort(skewThreshold,skewWarning,skewError,maxdeviance,true,false,false,(unsigned)globals->getPropInt("@smallSortThreshold"))) {
                    Owned<IThorException> e = MakeActivityException(this, TE_SortFailedSkewExceeded,"SORT failed, skew exceeded");
                    fireException(e);
                }
                barrier->wait(false); // merge complete
            }
            imaster->SortDone();
        }
        catch (IException *e)
        {
            ActPrintLog(e, "WARNING: exception during sort");
            throw;
        }
        ::Release(imaster);
        ActPrintLog("process exit");
    }
static void _doReplicate(CActivityBase *activity, IPartDescriptor &partDesc, ICopyFileProgress *iProgress)
{
    StringBuffer primaryName;
    getPartFilename(partDesc, 0, primaryName);;
    RemoteFilename rfn;
    IFileDescriptor &fileDesc = partDesc.queryOwner();
    unsigned copies = partDesc.numCopies();
    unsigned c=1;
    for (; c<copies; c++)
    {
        unsigned replicateCopy;
        unsigned clusterNum = partDesc.copyClusterNum(c, &replicateCopy);
        rfn.clear();
        partDesc.getFilename(c, rfn);
        StringBuffer dstName;
        rfn.getPath(dstName);
        assertex(dstName.length());

        if (replicateCopy>0 )  
        {
            try
            {
                queryThor().queryBackup().backup(dstName.str(), primaryName.str());
            }
            catch (IException *e)
            {
                Owned<IThorException> re = MakeActivityWarning(activity, e, "Failed to create replicate file '%s'", dstName.str());
                e->Release();
                activity->fireException(re);
            }
        }
        else // another primary
        {
            ActPrintLog(activity, "Copying to primary %s", dstName.str());
            StringBuffer tmpName(dstName.str());
            tmpName.append(".tmp");
            OwnedIFile tmpIFile = createIFile(tmpName.str());
            OwnedIFile srcFile = createIFile(primaryName.str());
            CFIPScope fipScope(tmpName.str());
            try
            {
                try
                {
                    ensureDirectoryForFile(dstName.str());
                    ::copyFile(tmpIFile, srcFile, 0x100000, iProgress);
                }
                catch (IException *e)
                {
                    IThorException *re = MakeActivityException(activity, e, "Failed to copy to tmp file '%s' from source file '%s'", tmpIFile->queryFilename(), srcFile->queryFilename());
                    e->Release();
                    throw re;
                }
                try
                {
                    OwnedIFile dstIFile = createIFile(dstName.str());
                    dstIFile->remove();
                    tmpIFile->rename(pathTail(dstName.str()));
                }
                catch (IException *e)
                {
                    IThorException *re = ThorWrapException(e, "Failed to rename '%s' to '%s'", tmpName.str(), dstName.str());
                    e->Release();
                    throw re;
                }
            }
            catch (IException *)
            {
                try { tmpIFile->remove(); }
                catch (IException *e) { ActPrintLog(&activity->queryContainer(), e, NULL); e->Release(); }
                throw;
            }
        }
    }
}
    virtual void process() override
    {
        ActPrintLog("INDEXWRITE: Start");
        init();

        IRowStream *stream = inputStream;
        ThorDataLinkMetaInfo info;
        input->getMetaInfo(info);
        outRowAllocator.setown(getRowAllocator(helper->queryDiskRecordSize()));
        start();
        if (refactor)
        {
            assertex(isLocal);
            if (active)
            {
                unsigned targetWidth = partDesc->queryOwner().numParts()-(buildTlk?1:0);
                assertex(0 == container.queryJob().querySlaves() % targetWidth);
                unsigned partsPerNode = container.queryJob().querySlaves() / targetWidth;
                unsigned myPart = queryJobChannel().queryMyRank();

                IArrayOf<IRowStream> streams;
                streams.append(*LINK(stream));
                --partsPerNode;

 // Should this be merging 1,11,21,31 etc.
                unsigned p=0;
                unsigned fromPart = targetWidth+1 + (partsPerNode * (myPart-1));
                for (; p<partsPerNode; p++)
                {
                    streams.append(*createRowStreamFromNode(*this, fromPart++, queryJobChannel().queryJobComm(), mpTag, abortSoon));
                }
                ICompare *icompare = helper->queryCompare();
                assertex(icompare);
                Owned<IRowLinkCounter> linkCounter = new CThorRowLinkCounter;
                myInputStream.setown(createRowStreamMerger(streams.ordinality(), streams.getArray(), icompare, false, linkCounter));
                stream = myInputStream;
            }
            else // serve nodes, creating merged parts
                rowServer.setown(createRowServer(this, stream, queryJobChannel().queryJobComm(), mpTag));
        }
        processed = THORDATALINK_STARTED;

        // single part key support
        // has to serially pull all data fron nodes 2-N
        // nodes 2-N, could/should start pushing some data (as it's supposed to be small) to cut down on serial nature.
        unsigned node = queryJobChannel().queryMyRank();
        if (singlePartKey)
        {
            if (1 == node)
            {
                try
                {
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    for (;;)
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        if (abortSoon) return;
                        processRow(row);
                    }

                    unsigned node = 2;
                    while (node <= container.queryJob().querySlaves())
                    {
                        Linked<IOutputRowDeserializer> deserializer = ::queryRowDeserializer(input);
                        CMessageBuffer mb;
                        Owned<ISerialStream> stream = createMemoryBufferSerialStream(mb);
                        CThorStreamDeserializerSource rowSource;
                        rowSource.setStream(stream);
                        bool successSR;
                        for (;;)
                        {
                            {
                                BooleanOnOff tf(receivingTag2);
                                successSR = queryJobChannel().queryJobComm().sendRecv(mb, node, mpTag2);
                            }
                            if (successSR)
                            {
                                if (rowSource.eos())
                                    break;
                                Linked<IEngineRowAllocator> allocator = ::queryRowAllocator(input);
                                do
                                {
                                    RtlDynamicRowBuilder rowBuilder(allocator);
                                    size32_t sz = deserializer->deserialize(rowBuilder, rowSource);
                                    OwnedConstThorRow fRow = rowBuilder.finalizeRowClear(sz);
                                    processRow(fRow);
                                }
                                while (!rowSource.eos());
                            }
                        }
                        node++;
                    }
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, true);
                    throw;
                }
                close(*partDesc, partCrc, true);
                stop();
            }
            else
            {
                CMessageBuffer mb;
                CMemoryRowSerializer mbs(mb);
                Linked<IOutputRowSerializer> serializer = ::queryRowSerializer(input);
                for (;;)
                {
                    BooleanOnOff tf(receivingTag2);
                    if (queryJobChannel().queryJobComm().recv(mb, 1, mpTag2)) // node 1 asking for more..
                    {
                        if (abortSoon) break;
                        mb.clear();
                        do
                        {
                            OwnedConstThorRow row = inputStream->ungroupedNextRow();
                            if (!row) break;
                            serializer->serialize(mbs, (const byte *)row.get());
                        } while (mb.length() < SINGLEPART_KEY_TRANSFER_SIZE); // NB: at least one row
                        if (!queryJobChannel().queryJobComm().reply(mb))
                            throw MakeThorException(0, "Failed to send index data to node 1, from node %d", node);
                        if (0 == mb.length())
                            break;
                    }
                }
            }
        }
        else
        {
            if (!refactor || active)
            {
                try
                {
                    StringBuffer partFname;
                    getPartFilename(*partDesc, 0, partFname);
                    ActPrintLog("INDEXWRITE: process: handling fname : %s", partFname.str());
                    open(*partDesc, false, helper->queryDiskRecordSize()->isVariableSize());
                    ActPrintLog("INDEXWRITE: write");

                    BooleanOnOff tf(receiving);
                    if (!refactor || !active)
                        receiving = false;
                    do
                    {
                        OwnedConstThorRow row = inputStream->ungroupedNextRow();
                        if (!row)
                            break;
                        processRow(row);
                    } while (!abortSoon);
                    ActPrintLog("INDEXWRITE: write level 0 complete");
                }
                catch (CATCHALL)
                {
                    close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node);
                    throw;
                }
                close(*partDesc, partCrc, isLocal && !buildTlk && 1 == node);
                stop();

                ActPrintLog("INDEXWRITE: Wrote %" RCPF "d records", processed & THORDATALINK_COUNT_MASK);

                if (buildTlk)
                {
                    ActPrintLog("INDEXWRITE: sending rows");
                    NodeInfoArray tlkRows;

                    CMessageBuffer msg;
                    if (firstNode())
                    {
                        if (processed & THORDATALINK_COUNT_MASK)
                        {
                            if (enableTlkPart0)
                                tlkRows.append(* new CNodeInfo(0, firstRow.get(), firstRowSize, totalCount));
                            tlkRows.append(* new CNodeInfo(1, lastRow.get(), lastRowSize, totalCount));
                        }
                    }
                    else
                    {
                        if (processed & THORDATALINK_COUNT_MASK)
                        {
                            CNodeInfo row(queryJobChannel().queryMyRank(), lastRow.get(), lastRowSize, totalCount);
                            row.serialize(msg);
                        }
                        queryJobChannel().queryJobComm().send(msg, 1, mpTag);
                    }

                    if (firstNode())
                    {
                        ActPrintLog("INDEXWRITE: Waiting on tlk to complete");

                        // JCSMORE if refactor==true, is rowsToReceive here right??
                        unsigned rowsToReceive = (refactor ? (tlkDesc->queryOwner().numParts()-1) : container.queryJob().querySlaves()) -1; // -1 'cos got my own in array already
                        ActPrintLog("INDEXWRITE: will wait for info from %d slaves before writing TLK", rowsToReceive);
                        while (rowsToReceive--)
                        {
                            msg.clear();
                            receiveMsg(msg, RANK_ALL, mpTag); // NH->JCS RANK_ALL_OTHER not supported for recv
                            if (abortSoon)
                                return;
                            if (msg.length())
                            {
                                CNodeInfo *ni = new CNodeInfo();
                                ni->deserialize(msg);
                                tlkRows.append(*ni);
                            }
                        }
                        tlkRows.sort(CNodeInfo::compare);

                        StringBuffer path;
                        getPartFilename(*tlkDesc, 0, path);
                        ActPrintLog("INDEXWRITE: creating toplevel key file : %s", path.str());
                        try
                        {
                            open(*tlkDesc, true, helper->queryDiskRecordSize()->isVariableSize());
                            if (tlkRows.length())
                            {
                                CNodeInfo &lastNode = tlkRows.item(tlkRows.length()-1);
                                memset(lastNode.value, 0xff, lastNode.size);
                            }
                            ForEachItemIn(idx, tlkRows)
                            {
                                CNodeInfo &info = tlkRows.item(idx);
                                builder->processKeyData((char *)info.value, info.pos, info.size);
                            }
                            close(*tlkDesc, tlkCrc, true);
                        }
                        catch (CATCHALL)
                        {
                            abortSoon = true;
                            close(*tlkDesc, tlkCrc, true);
                            removeFiles(*partDesc);
                            throw;
                        }
                    }
                }
                else if (!isLocal && firstNode())