示例#1
0
 virtual void validateFile(IDistributedFile *file)
 {
     IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper();
     bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags());
     bool isGrouped = fileDesc->isGrouped();
     if (isGrouped != codeGenGrouped)
     {
         Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped");
         queryJobChannel().fireException(e);
     }
     IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta();
     if (recordSize->isFixedSize()) // fixed size
     {
         if (0 != fileDesc->queryProperties().getPropInt("@recordSize"))
         {
             size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize");
             if (isGrouped)
                 rSz--; // eog byte not to be included in this test.
             if (rSz != recordSize->getMinRecordSize())
                 throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize());
         }
         if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags()))
         {
             size32_t rSz = recordSize->getMinRecordSize();
             if (isGrouped) rSz++;
             if (rSz >= MIN_ROWCOMPRESS_RECSIZE)
             {
                 Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get());
                 queryJobChannel().fireException(e);
             }
         }
     }
     if (0 == (TDRnocrccheck & helper->getFlags()))
         checkFormatCrc(this, file, helper->getFormatCrc(), false);
 }
    virtual void init(MemoryBuffer &data, MemoryBuffer &slaveData) override
    {
        isLocal = 0 != (TIWlocal & helper->getFlags());

        mpTag = container.queryJobChannel().deserializeMPTag(data);
        mpTag2 = container.queryJobChannel().deserializeMPTag(data);
        data.read(active);
        if (active)
        {
            data.read(logicalFilename);
            partDesc.setown(deserializePartFileDescriptor(data));
        }

        data.read(singlePartKey);
        data.read(refactor);
        if (singlePartKey)
            buildTlk = false;
        else
        {
            data.read(buildTlk);
            if (firstNode())
            {
                if (buildTlk)
                    tlkDesc.setown(deserializePartFileDescriptor(data));
                else if (!isLocal) // existing tlk then..
                {
                    tlkDesc.setown(deserializePartFileDescriptor(data));
                    unsigned c;
                    data.read(c);
                    while (c--)
                    {
                        RemoteFilename rf;
                        rf.deserialize(data);
                        if (!existingTlkIFile)
                        {
                            Owned<IFile> iFile = createIFile(rf);
                            if (iFile->exists())
                                existingTlkIFile.set(iFile);
                        }
                    }
                    if (!existingTlkIFile)
                        throw MakeActivityException(this, TE_FileNotFound, "Top level key part does not exist, for key");
                }
            }
        }

        IOutputMetaData * diskSize = helper->queryDiskRecordSize();
        assertex(!(diskSize->getMetaFlags() & MDFneedserializedisk));
        if (diskSize->isVariableSize())
        {
            if (TIWmaxlength & helper->getFlags())
                maxDiskRecordSize = helper->getMaxKeySize();
            else
                maxDiskRecordSize = KEYBUILD_MAXLENGTH; //Current default behaviour, could be improved in the future
        }
        else
            maxDiskRecordSize = diskSize->getFixedSize();
        reportOverflow = false;
    }
示例#3
0
    void init()
    {
        CWriteMasterBase::init();

        IHThorDiskWriteArg *helper=(IHThorDiskWriteArg *)queryHelper();
        IOutputMetaData *irecsize = helper->queryDiskRecordSize()->querySerializedDiskMeta();
        IPropertyTree &props = fileDesc->queryProperties();
        if (0 != (helper->getFlags() & TDXgrouped))
            props.setPropBool("@grouped", true);
        if (irecsize->isFixedSize()) // fixed size
        {
            size32_t rSz = irecsize->getMinRecordSize();
            if (0 != (helper->getFlags() & TDXgrouped))
                ++rSz;
            props.setPropInt("@recordSize", rSz);
        }
        props.setPropInt("@formatCrc", helper->getFormatCrc());
    }
示例#4
0
 void CouchbaseEmbedFunctionContext::bindDatasetParam(const char *name, IOutputMetaData & metaVal, IRowStream * val)
 {
     // We only support a single dataset parameter...
     // MORE - look into batch?
     if (m_oInputStream)
     {
         fail("At most one dataset parameter supported");
     }
     m_oInputStream.setown(new CouchbaseDatasetBinder(logctx, LINK(val), metaVal.queryTypeInfo(), m_pQcmd, m_nextParam));
     m_nextParam += m_oInputStream->numFields();
 }
    virtual void process()
    {
        CMasterActivity::process();

        IHThorDistributionArg * helper = (IHThorDistributionArg *)queryHelper();
        IOutputMetaData *rcSz = helper->queryInternalRecordSize();

        unsigned nslaves = container.queryJob().querySlaves();

        IDistributionTable * * result = (IDistributionTable * *)createThorRow(rcSz->getMinRecordSize()); // not a real row
        helper->clearAggregate(result);


        while (nslaves--)
        {
            rank_t sender;
            CMessageBuffer msg;
            if (!receiveMsg(msg, RANK_ALL, mpTag, &sender))
                return;
#if THOR_TRACE_LEVEL >= 5
            ActPrintLog("Received distribution result from node %d", (unsigned)sender);
#endif
            if (msg.length())
                helper->merge(result, msg);
        }

        StringBuffer tmp;
        tmp.append("<XML>");
        helper->gatherResult(result, tmp);
        tmp.append("</XML>");

#if THOR_TRACE_LEVEL >= 5
        ActPrintLog("Distribution result: %s", tmp.str());
#endif

        helper->sendResult(tmp.length(), tmp.str());

        destroyThorRow(result);
    }
示例#6
0
    virtual void init()
    {
        CMasterActivity::init();
        OwnedRoxieString helperFileName = helper->getFileName();
        StringBuffer expandedFileName;
        queryThorFileManager().addScope(container.queryJob(), helperFileName, expandedFileName, false);
        fileName.set(expandedFileName);
        dlfn.set(fileName);
        isLocal = 0 != (TIWlocal & helper->getFlags());
        IOutputMetaData * diskSize = helper->queryDiskRecordSize();
        unsigned minSize = diskSize->getMinRecordSize();
        if (minSize > KEYBUILD_MAXLENGTH)
            throw MakeActivityException(this, 0, "Index minimum record length (%d) exceeds %d internal limit", minSize, KEYBUILD_MAXLENGTH);
        unsigned maxSize;
        if (diskSize->isVariableSize())
        {
            if (TIWmaxlength & helper->getFlags())
                maxSize = helper->getMaxKeySize();
            else
                maxSize = KEYBUILD_MAXLENGTH; //Current default behaviour, could be improved in the future
        }
        else
            maxSize = diskSize->getFixedSize();
        if (maxSize > KEYBUILD_MAXLENGTH)
            throw MakeActivityException(this, 0, "Index maximum record length (%d) exceeds %d internal limit. Maximum size = %d, try adjusting index MAXLENGTH", maxSize, KEYBUILD_MAXLENGTH, minSize);

        singlePartKey = 0 != (helper->getFlags() & TIWsmall) || dlfn.isExternal();
        clusters.kill();
        unsigned idx=0;
        while (true)
        {
            OwnedRoxieString cluster(helper->getCluster(idx));
            if(!cluster)
                break;
            clusters.append(cluster);
            idx++;
        }
        IArrayOf<IGroup> groups;
        if (singlePartKey)
        {
            isLocal = true;
            buildTlk = false;
        }
        else if (!isLocal || globals->getPropBool("@buildLocalTlks", true))
            buildTlk = true;

        fillClusterArray(container.queryJob(), fileName, clusters, groups);
        unsigned restrictedWidth = 0;
        if (TIWhaswidth & helper->getFlags())
        {
            restrictedWidth = helper->getWidth();
            if (restrictedWidth > container.queryJob().querySlaves())
                throw MakeActivityException(this, 0, "Unsupported, can't refactor to width(%d) larger than host cluster(%d)", restrictedWidth, container.queryJob().querySlaves());
            else if (restrictedWidth < container.queryJob().querySlaves())
            {
                if (!isLocal)
                    throw MakeActivityException(this, 0, "Unsupported, refactoring to few parts only supported for local indexes.");
                assertex(!singlePartKey);
                unsigned gwidth = groups.item(0).ordinality();
                if (0 != container.queryJob().querySlaves() % gwidth)
                    throw MakeActivityException(this, 0, "Unsupported, refactored target size (%d) must be factor of thor cluster width (%d)", groups.item(0).ordinality(), container.queryJob().querySlaves());
                if (0 == restrictedWidth)
                    restrictedWidth = gwidth;
                ForEachItemIn(g, groups)
                {
                    IGroup &group = groups.item(g);
                    if (gwidth != groups.item(g).ordinality())
                        throw MakeActivityException(this, 0, "Unsupported, cannot output multiple refactored widths, targeting cluster '%s' and '%s'", clusters.item(0), clusters.item(g));
                    if (gwidth != restrictedWidth)
                        groups.replace(*group.subset((unsigned)0, restrictedWidth), g);
                }
                refactor = true;
            }
示例#7
0
 void CouchbaseEmbedFunctionContext::bindRowParam(const char *name, IOutputMetaData & metaVal, byte *val)
 {
     CouchbaseRecordBinder binder(logctx, metaVal.queryTypeInfo(), m_pQcmd, m_nextParam);
     binder.processRow(val);
     m_nextParam += binder.numFields();
 }
示例#8
0
void CDiskWriteSlaveActivityBase::open()
{
    if (dlfn.isExternal() && !firstNode())
    {
        input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage()));
        startInput(input);
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %" RCPF "d", prevRows);
        }
    }
    else
    {
        input.set(inputs.item(0));
        startInput(input);
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());

    /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression.
     * NB: only for FLAT files, not CSV or XML
     */
    size32_t diskRowMinSz = 0;
    IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
    if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
    {
        diskRowMinSz = diskRowMeta->getMinRecordSize();
        if (grouped)
            diskRowMinSz += 1;
    }

    calcFileCrc = true;

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    unsigned twFlags = external ? TW_External : 0;
    if (query || (external && !firstNode()))
        twFlags |= TW_Direct;
    if (!external || (!query && lastNode()))
        twFlags |= TW_RenameToPrimary;
    if (extend||(external&&!query))
        twFlags |= TW_Extend;

    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL);

    if (compress)
    {
        ActPrintLog("Performing row compression on output file: %s", fName.get());
        // NB: block compressed output has implicit crc of 0, no need to calculate in row  writer.
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(iFileIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(iFileIO));
        unsigned rwFlags = 0;
        if (grouped)
            rwFlags |= rw_grouped;
        if (calcFileCrc)
            rwFlags |= rw_crc;
        out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags));
    }
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s", fName.get());
}
示例#9
0
void CWriteMasterBase::publish()
{
    if (published) return;
    published = true;
    if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp)))
        updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed);

    IPropertyTree &props = fileDesc->queryProperties();
    props.setPropInt64("@recordCount", recordsProcessed);
    if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints())
    {
        if (0 != (diskHelperBase->getFlags() & TDWexpires))
            setExpiryTime(props, diskHelperBase->getExpiryDays());
        if (TDWupdate & diskHelperBase->getFlags())
        {
            unsigned eclCRC;
            unsigned __int64 totalCRC;
            diskHelperBase->getUpdateCRCs(eclCRC, totalCRC);
            props.setPropInt("@eclCRC", eclCRC);
            props.setPropInt64("@totalCRC", totalCRC);
        }
    }
    container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters);
    if (!dlfn.isExternal())
    {
        bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary);
        if (!temporary && (queryJob().querySlaves() < fileDesc->numParts()))
        {
            // create empty parts for a fileDesc being published that is larger than this clusters
            size32_t recordSize = 0;
            IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
            if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
            {
                recordSize = diskRowMeta->getMinRecordSize();
                if (0 != (diskHelperBase->getFlags() & TDXgrouped))
                    recordSize += 1;
            }
            unsigned compMethod = COMPRESS_METHOD_LZW;
            // rowdiff used if recordSize > 0, else fallback to compMethod
            if (getOptBool(THOROPT_COMP_FORCELZW, false))
            {
                recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod.
                compMethod = COMPRESS_METHOD_LZW;
            }
            else if (getOptBool(THOROPT_COMP_FORCEFLZ, false))
                compMethod = COMPRESS_METHOD_FASTLZ;
            else if (getOptBool(THOROPT_COMP_FORCELZ4, false))
                compMethod = COMPRESS_METHOD_LZ4;
            bool blockCompressed;
            bool compressed = fileDesc->isCompressed(&blockCompressed);
            for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++)
            {
                StringBuffer clusterName;
                fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore());
                PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str());
                unsigned p=0;
                while (p<fileDesc->numParts())
                {
                    if (p == targetOffset)
                        p += queryJob().querySlaves();
                    IPartDescriptor *partDesc = fileDesc->queryPart(p);
                    CDateTime createTime, modifiedTime;
                    for (unsigned c=0; c<partDesc->numCopies(); c++)
                    {
                        RemoteFilename rfn;
                        partDesc->getFilename(c, rfn);
                        StringBuffer path;
                        rfn.getPath(path);
                        try
                        {
                            ensureDirectoryForFile(path.str());
                            OwnedIFile iFile = createIFile(path.str());
                            Owned<IFileIO> iFileIO;
                            if (compressed)
                                iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod));
                            else
                                iFileIO.setown(iFile->open(IFOcreate));
                            dbgassertex(iFileIO.get());
                            iFileIO.clear();
                            // ensure copies have matching datestamps, as they would do normally (backupnode expects it)
                            if (partDesc->numCopies() > 1)
                            {
                                if (0 == c)
                                    iFile->getTime(&createTime, &modifiedTime, NULL);
                                else
                                    iFile->setTime(&createTime, &modifiedTime, NULL);
                            }
                        }
                        catch (IException *e)
                        {
                            if (0 == c)
                                throw;
                            Owned<IThorException> e2 = MakeThorException(e);
                            e->Release();
                            e2->setAction(tea_warning);
                            queryJob().fireException(e2);
                        }
                    }
                    partDesc->queryProperties().setPropInt64("@size", 0);
                    p++;
                }
                clusterIdx++;
            }
        }
        queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL);
    }
}