Beispiel #1
0
 virtual void validateFile(IDistributedFile *file)
 {
     IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper();
     bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags());
     bool isGrouped = fileDesc->isGrouped();
     if (isGrouped != codeGenGrouped)
     {
         Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped");
         queryJobChannel().fireException(e);
     }
     IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta();
     if (recordSize->isFixedSize()) // fixed size
     {
         if (0 != fileDesc->queryProperties().getPropInt("@recordSize"))
         {
             size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize");
             if (isGrouped)
                 rSz--; // eog byte not to be included in this test.
             if (rSz != recordSize->getMinRecordSize())
                 throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize());
         }
         if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags()))
         {
             size32_t rSz = recordSize->getMinRecordSize();
             if (isGrouped) rSz++;
             if (rSz >= MIN_ROWCOMPRESS_RECSIZE)
             {
                 Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get());
                 queryJobChannel().fireException(e);
             }
         }
     }
     if (0 == (TDRnocrccheck & helper->getFlags()))
         checkFormatCrc(this, file, helper->getFormatCrc(), false);
 }
Beispiel #2
0
    void init()
    {
        CWriteMasterBase::init();

        IHThorDiskWriteArg *helper=(IHThorDiskWriteArg *)queryHelper();
        IOutputMetaData *irecsize = helper->queryDiskRecordSize()->querySerializedDiskMeta();
        IPropertyTree &props = fileDesc->queryProperties();
        if (0 != (helper->getFlags() & TDXgrouped))
            props.setPropBool("@grouped", true);
        if (irecsize->isFixedSize()) // fixed size
        {
            size32_t rSz = irecsize->getMinRecordSize();
            if (0 != (helper->getFlags() & TDXgrouped))
                ++rSz;
            props.setPropInt("@recordSize", rSz);
        }
        props.setPropInt("@formatCrc", helper->getFormatCrc());
    }
void CDiskWriteSlaveActivityBase::open()
{
    if (dlfn.isExternal() && !firstNode())
    {
        input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage()));
        startInput(input);
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %" RCPF "d", prevRows);
        }
    }
    else
    {
        input.set(inputs.item(0));
        startInput(input);
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());

    /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression.
     * NB: only for FLAT files, not CSV or XML
     */
    size32_t diskRowMinSz = 0;
    IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
    if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
    {
        diskRowMinSz = diskRowMeta->getMinRecordSize();
        if (grouped)
            diskRowMinSz += 1;
    }

    calcFileCrc = true;

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    unsigned twFlags = external ? TW_External : 0;
    if (query || (external && !firstNode()))
        twFlags |= TW_Direct;
    if (!external || (!query && lastNode()))
        twFlags |= TW_RenameToPrimary;
    if (extend||(external&&!query))
        twFlags |= TW_Extend;

    Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL);

    if (compress)
    {
        ActPrintLog("Performing row compression on output file: %s", fName.get());
        // NB: block compressed output has implicit crc of 0, no need to calculate in row  writer.
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(iFileIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(iFileIO));
        unsigned rwFlags = 0;
        if (grouped)
            rwFlags |= rw_grouped;
        if (calcFileCrc)
            rwFlags |= rw_crc;
        out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags));
    }
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s", fName.get());
}
Beispiel #4
0
void CWriteMasterBase::publish()
{
    if (published) return;
    published = true;
    if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp)))
        updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed);

    IPropertyTree &props = fileDesc->queryProperties();
    props.setPropInt64("@recordCount", recordsProcessed);
    if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints())
    {
        if (0 != (diskHelperBase->getFlags() & TDWexpires))
            setExpiryTime(props, diskHelperBase->getExpiryDays());
        if (TDWupdate & diskHelperBase->getFlags())
        {
            unsigned eclCRC;
            unsigned __int64 totalCRC;
            diskHelperBase->getUpdateCRCs(eclCRC, totalCRC);
            props.setPropInt("@eclCRC", eclCRC);
            props.setPropInt64("@totalCRC", totalCRC);
        }
    }
    container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters);
    if (!dlfn.isExternal())
    {
        bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary);
        if (!temporary && (queryJob().querySlaves() < fileDesc->numParts()))
        {
            // create empty parts for a fileDesc being published that is larger than this clusters
            size32_t recordSize = 0;
            IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
            if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
            {
                recordSize = diskRowMeta->getMinRecordSize();
                if (0 != (diskHelperBase->getFlags() & TDXgrouped))
                    recordSize += 1;
            }
            unsigned compMethod = COMPRESS_METHOD_LZW;
            // rowdiff used if recordSize > 0, else fallback to compMethod
            if (getOptBool(THOROPT_COMP_FORCELZW, false))
            {
                recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod.
                compMethod = COMPRESS_METHOD_LZW;
            }
            else if (getOptBool(THOROPT_COMP_FORCEFLZ, false))
                compMethod = COMPRESS_METHOD_FASTLZ;
            else if (getOptBool(THOROPT_COMP_FORCELZ4, false))
                compMethod = COMPRESS_METHOD_LZ4;
            bool blockCompressed;
            bool compressed = fileDesc->isCompressed(&blockCompressed);
            for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++)
            {
                StringBuffer clusterName;
                fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore());
                PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str());
                unsigned p=0;
                while (p<fileDesc->numParts())
                {
                    if (p == targetOffset)
                        p += queryJob().querySlaves();
                    IPartDescriptor *partDesc = fileDesc->queryPart(p);
                    CDateTime createTime, modifiedTime;
                    for (unsigned c=0; c<partDesc->numCopies(); c++)
                    {
                        RemoteFilename rfn;
                        partDesc->getFilename(c, rfn);
                        StringBuffer path;
                        rfn.getPath(path);
                        try
                        {
                            ensureDirectoryForFile(path.str());
                            OwnedIFile iFile = createIFile(path.str());
                            Owned<IFileIO> iFileIO;
                            if (compressed)
                                iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod));
                            else
                                iFileIO.setown(iFile->open(IFOcreate));
                            dbgassertex(iFileIO.get());
                            iFileIO.clear();
                            // ensure copies have matching datestamps, as they would do normally (backupnode expects it)
                            if (partDesc->numCopies() > 1)
                            {
                                if (0 == c)
                                    iFile->getTime(&createTime, &modifiedTime, NULL);
                                else
                                    iFile->setTime(&createTime, &modifiedTime, NULL);
                            }
                        }
                        catch (IException *e)
                        {
                            if (0 == c)
                                throw;
                            Owned<IThorException> e2 = MakeThorException(e);
                            e->Release();
                            e2->setAction(tea_warning);
                            queryJob().fireException(e2);
                        }
                    }
                    partDesc->queryProperties().setPropInt64("@size", 0);
                    p++;
                }
                clusterIdx++;
            }
        }
        queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL);
    }
}