void CWriteMasterBase::publish() { if (published) return; published = true; if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp))) updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed); IPropertyTree &props = fileDesc->queryProperties(); props.setPropInt64("@recordCount", recordsProcessed); if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints()) { if (0 != (diskHelperBase->getFlags() & TDWexpires)) setExpiryTime(props, diskHelperBase->getExpiryDays()); if (TDWupdate & diskHelperBase->getFlags()) { unsigned eclCRC; unsigned __int64 totalCRC; diskHelperBase->getUpdateCRCs(eclCRC, totalCRC); props.setPropInt("@eclCRC", eclCRC); props.setPropInt64("@totalCRC", totalCRC); } } container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters); if (!dlfn.isExternal()) { bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary); if (!temporary && (queryJob().querySlaves() < fileDesc->numParts())) { // create empty parts for a fileDesc being published that is larger than this clusters size32_t recordSize = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { recordSize = diskRowMeta->getMinRecordSize(); if (0 != (diskHelperBase->getFlags() & TDXgrouped)) recordSize += 1; } unsigned compMethod = COMPRESS_METHOD_LZW; // rowdiff used if recordSize > 0, else fallback to compMethod if (getOptBool(THOROPT_COMP_FORCELZW, false)) { recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod. compMethod = COMPRESS_METHOD_LZW; } else if (getOptBool(THOROPT_COMP_FORCEFLZ, false)) compMethod = COMPRESS_METHOD_FASTLZ; else if (getOptBool(THOROPT_COMP_FORCELZ4, false)) compMethod = COMPRESS_METHOD_LZ4; bool blockCompressed; bool compressed = fileDesc->isCompressed(&blockCompressed); for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++) { StringBuffer clusterName; fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore()); PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str()); unsigned p=0; while (p<fileDesc->numParts()) { if (p == targetOffset) p += queryJob().querySlaves(); IPartDescriptor *partDesc = fileDesc->queryPart(p); CDateTime createTime, modifiedTime; for (unsigned c=0; c<partDesc->numCopies(); c++) { RemoteFilename rfn; partDesc->getFilename(c, rfn); StringBuffer path; rfn.getPath(path); try { ensureDirectoryForFile(path.str()); OwnedIFile iFile = createIFile(path.str()); Owned<IFileIO> iFileIO; if (compressed) iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod)); else iFileIO.setown(iFile->open(IFOcreate)); dbgassertex(iFileIO.get()); iFileIO.clear(); // ensure copies have matching datestamps, as they would do normally (backupnode expects it) if (partDesc->numCopies() > 1) { if (0 == c) iFile->getTime(&createTime, &modifiedTime, NULL); else iFile->setTime(&createTime, &modifiedTime, NULL); } } catch (IException *e) { if (0 == c) throw; Owned<IThorException> e2 = MakeThorException(e); e->Release(); e2->setAction(tea_warning); queryJob().fireException(e2); } } partDesc->queryProperties().setPropInt64("@size", 0); p++; } clusterIdx++; } } queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL); } }
void copyCompress(const char *from, const char *to, size32_t rowsize, bool fast, bool flzstrm, bool stats) { Owned<IFile> srcfile = createIFile(from); Owned<IFileIO> baseio = srcfile->open(IFOread); if (!baseio) { printf("ERROR: could not open '%s' for read\n",from); doexit(3); } Owned<ICompressedFileIO> cmpio = createCompressedFileReader(baseio); Owned<IFileIOStream> flzstrmsrc = cmpio?NULL:createFastLZStreamRead(baseio); bool plaincopy = false; IFileIO *srcio = NULL; if (cmpio) { srcio = cmpio; if (rowsize&&(cmpio->recordSize()==rowsize)) plaincopy = true; else if (!rowsize) { if (fast&&(cmpio->method()==COMPRESS_METHOD_FASTLZ)) plaincopy = true; else if (!fast&&(cmpio->method()==COMPRESS_METHOD_LZW)) plaincopy = true; } } else if (flzstrmsrc) { if (flzstrm) plaincopy = true; } else srcio = baseio; if (plaincopy) { cmpio.clear(); srcio = baseio.get(); } Owned<IFile> dstfile = createIFile(to); StringBuffer fulldst; if (dstfile->isDirectory()==foundYes) { dstfile.clear(); addPathSepChar(fulldst.append(to)).append(pathTail(from)); to = fulldst.str(); dstfile.setown(createIFile(to)); } if (dstfile->exists()) { printf("ERROR: file '%s' already exists\n",to); doexit(4); } unsigned start; unsigned startu; if (stats) { start = msTick(); startu = usTick(); } Owned<IFileIO> dstio; Owned<IFileIOStream> flzstrmdst; if (plaincopy||flzstrm) { dstio.setown(dstfile->open(IFOcreate)); if (dstio&&!plaincopy) flzstrmdst.setown(createFastLZStreamWrite(dstio)); } else dstio.setown(createCompressedFileWriter(dstfile,rowsize,false,true,NULL,fast)); if (!dstio) { printf("ERROR: could not open '%s' for write\n",to); doexit(5); } #ifdef __linux__ // this is not really needed in windows - if it is we will have to // test the file extension - .exe, .bat struct stat info; if (stat(from, &info) == 0) // cannot fail - exception would have been thrown above dstfile->setCreateFlags(info.st_mode&(S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH|S_IXUSR|S_IXGRP|S_IXOTH)); #endif MemoryAttr mb; void * buffer = mb.allocate(BUFFERSIZE); offset_t offset = 0; try { loop { size32_t got = cmpio.get()?cmpio->read(offset, BUFFERSIZE, buffer):srcio->read(offset, BUFFERSIZE, buffer); if (got == 0) break; if (flzstrmdst) flzstrmdst->write(got,buffer); else dstio->write(offset, got, buffer); offset += got; } } catch (IException *e) { // try to delete partial copy dstio.clear(); try { dstfile->remove(); } catch (IException *e2) { StringBuffer s; pexception(s.clear().append("Removing partial copy file: ").append(to).str(),e2); e2->Release(); } throw e; } flzstrmdst.clear(); dstio.clear(); if (stats) printStats(offset,start,startu); CDateTime createTime, modifiedTime; if (srcfile->getTime(&createTime, &modifiedTime, NULL)) dstfile->setTime(&createTime, &modifiedTime, NULL); printf("copied %s to %s%s\n",from,to,plaincopy?"":" compressing"); { // print details dstio.setown(dstfile->open(IFOread)); if (dstio) { Owned<ICompressedFileIO> cmpio = createCompressedFileReader(dstio); Owned<IFileIOStream> flzstrm = cmpio?NULL:createFastLZStreamRead(dstio); if (cmpio||flzstrm) printCompDetails(to,dstio,cmpio,flzstrm); else printf("destination %s not compressed\n",to); } else printf("destination %s could not be read\n",to); } }