Ejemplo n.º 1
0
void CWriteMasterBase::publish()
{
    if (published) return;
    published = true;
    if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp)))
        updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed);

    IPropertyTree &props = fileDesc->queryProperties();
    props.setPropInt64("@recordCount", recordsProcessed);
    if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints())
    {
        if (0 != (diskHelperBase->getFlags() & TDWexpires))
            setExpiryTime(props, diskHelperBase->getExpiryDays());
        if (TDWupdate & diskHelperBase->getFlags())
        {
            unsigned eclCRC;
            unsigned __int64 totalCRC;
            diskHelperBase->getUpdateCRCs(eclCRC, totalCRC);
            props.setPropInt("@eclCRC", eclCRC);
            props.setPropInt64("@totalCRC", totalCRC);
        }
    }
    container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters);
    if (!dlfn.isExternal())
    {
        bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary);
        if (!temporary && (queryJob().querySlaves() < fileDesc->numParts()))
        {
            // create empty parts for a fileDesc being published that is larger than this clusters
            size32_t recordSize = 0;
            IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
            if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
            {
                recordSize = diskRowMeta->getMinRecordSize();
                if (0 != (diskHelperBase->getFlags() & TDXgrouped))
                    recordSize += 1;
            }
            unsigned compMethod = COMPRESS_METHOD_LZW;
            // rowdiff used if recordSize > 0, else fallback to compMethod
            if (getOptBool(THOROPT_COMP_FORCELZW, false))
            {
                recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod.
                compMethod = COMPRESS_METHOD_LZW;
            }
            else if (getOptBool(THOROPT_COMP_FORCEFLZ, false))
                compMethod = COMPRESS_METHOD_FASTLZ;
            else if (getOptBool(THOROPT_COMP_FORCELZ4, false))
                compMethod = COMPRESS_METHOD_LZ4;
            bool blockCompressed;
            bool compressed = fileDesc->isCompressed(&blockCompressed);
            for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++)
            {
                StringBuffer clusterName;
                fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore());
                PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str());
                unsigned p=0;
                while (p<fileDesc->numParts())
                {
                    if (p == targetOffset)
                        p += queryJob().querySlaves();
                    IPartDescriptor *partDesc = fileDesc->queryPart(p);
                    CDateTime createTime, modifiedTime;
                    for (unsigned c=0; c<partDesc->numCopies(); c++)
                    {
                        RemoteFilename rfn;
                        partDesc->getFilename(c, rfn);
                        StringBuffer path;
                        rfn.getPath(path);
                        try
                        {
                            ensureDirectoryForFile(path.str());
                            OwnedIFile iFile = createIFile(path.str());
                            Owned<IFileIO> iFileIO;
                            if (compressed)
                                iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod));
                            else
                                iFileIO.setown(iFile->open(IFOcreate));
                            dbgassertex(iFileIO.get());
                            iFileIO.clear();
                            // ensure copies have matching datestamps, as they would do normally (backupnode expects it)
                            if (partDesc->numCopies() > 1)
                            {
                                if (0 == c)
                                    iFile->getTime(&createTime, &modifiedTime, NULL);
                                else
                                    iFile->setTime(&createTime, &modifiedTime, NULL);
                            }
                        }
                        catch (IException *e)
                        {
                            if (0 == c)
                                throw;
                            Owned<IThorException> e2 = MakeThorException(e);
                            e->Release();
                            e2->setAction(tea_warning);
                            queryJob().fireException(e2);
                        }
                    }
                    partDesc->queryProperties().setPropInt64("@size", 0);
                    p++;
                }
                clusterIdx++;
            }
        }
        queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL);
    }
}
Ejemplo n.º 2
0
void copyCompress(const char *from, const char *to, size32_t rowsize, bool fast, bool flzstrm, bool stats)
{
    Owned<IFile> srcfile = createIFile(from);
    Owned<IFileIO> baseio = srcfile->open(IFOread);
    if (!baseio) {
        printf("ERROR: could not open '%s' for read\n",from);
        doexit(3);
    }
    Owned<ICompressedFileIO> cmpio = createCompressedFileReader(baseio);
    Owned<IFileIOStream>  flzstrmsrc = cmpio?NULL:createFastLZStreamRead(baseio);
    bool plaincopy = false;
    IFileIO *srcio = NULL;
    if (cmpio) {
        srcio = cmpio;
        if (rowsize&&(cmpio->recordSize()==rowsize))
            plaincopy = true;
        else if (!rowsize) {
            if (fast&&(cmpio->method()==COMPRESS_METHOD_FASTLZ))
                plaincopy = true;
            else if (!fast&&(cmpio->method()==COMPRESS_METHOD_LZW))
                plaincopy = true;
        }
    }
    else if (flzstrmsrc) {
        if (flzstrm)
            plaincopy = true;
    }
    else
        srcio = baseio; 
    if (plaincopy) {
        cmpio.clear();
        srcio = baseio.get(); 
    }
    Owned<IFile> dstfile = createIFile(to);
    StringBuffer fulldst;
    if (dstfile->isDirectory()==foundYes) {
        dstfile.clear();
        addPathSepChar(fulldst.append(to)).append(pathTail(from));
        to = fulldst.str();
        dstfile.setown(createIFile(to));
    }

    if (dstfile->exists()) {
        printf("ERROR: file '%s' already exists\n",to);
        doexit(4);
    }
    unsigned start;
    unsigned startu;
    if (stats) {
         start = msTick();
         startu = usTick();
    }
    Owned<IFileIO> dstio;
    Owned<IFileIOStream>  flzstrmdst;
    if (plaincopy||flzstrm) {
        dstio.setown(dstfile->open(IFOcreate));
        if (dstio&&!plaincopy)
            flzstrmdst.setown(createFastLZStreamWrite(dstio));
    }
    else 
        dstio.setown(createCompressedFileWriter(dstfile,rowsize,false,true,NULL,fast));

    if (!dstio) {
        printf("ERROR: could not open '%s' for write\n",to);
        doexit(5);
    }
#ifdef __linux__
    // this is not really needed in windows - if it is we will have to
    // test the file extension - .exe, .bat

    struct stat info;
    if (stat(from, &info) == 0)  // cannot fail - exception would have been thrown above
        dstfile->setCreateFlags(info.st_mode&(S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH|S_IXUSR|S_IXGRP|S_IXOTH));
#endif
    MemoryAttr mb;
    void * buffer = mb.allocate(BUFFERSIZE);

    offset_t offset = 0;
    try
    {
        loop {
            size32_t got = cmpio.get()?cmpio->read(offset, BUFFERSIZE, buffer):srcio->read(offset, BUFFERSIZE, buffer);
            if (got == 0)
                break;
            if (flzstrmdst)
                flzstrmdst->write(got,buffer);
            else
                dstio->write(offset, got, buffer);
            offset += got;
        }
    }
    catch (IException *e)
    {
        // try to delete partial copy
        dstio.clear();
        try {
            dstfile->remove();
        }
        catch (IException *e2) {
            StringBuffer s;
            pexception(s.clear().append("Removing partial copy file: ").append(to).str(),e2);
            e2->Release();
        }
        throw e;
    }
    flzstrmdst.clear();
    dstio.clear();
    if (stats) 
        printStats(offset,start,startu);
    CDateTime createTime, modifiedTime;
    if (srcfile->getTime(&createTime, &modifiedTime, NULL))
        dstfile->setTime(&createTime, &modifiedTime, NULL);
    printf("copied %s to %s%s\n",from,to,plaincopy?"":" compressing");
    { // print details 
        dstio.setown(dstfile->open(IFOread));
        if (dstio) {
            Owned<ICompressedFileIO> cmpio = createCompressedFileReader(dstio);
            Owned<IFileIOStream>  flzstrm = cmpio?NULL:createFastLZStreamRead(dstio);
            if (cmpio||flzstrm) 
                printCompDetails(to,dstio,cmpio,flzstrm);
            else 
                printf("destination %s not compressed\n",to);
        }
        else
            printf("destination %s could not be read\n",to);
    }
}