virtual void validateFile(IDistributedFile *file) { IHThorDiskReadBaseArg *helper = (IHThorDiskReadBaseArg *)queryHelper(); bool codeGenGrouped = 0 != (TDXgrouped & helper->getFlags()); bool isGrouped = fileDesc->isGrouped(); if (isGrouped != codeGenGrouped) { Owned<IException> e = MakeActivityWarning(&container, TE_GroupMismatch, "DFS and code generated group info. differs: DFS(%s), CodeGen(%s), using DFS info", isGrouped?"grouped":"ungrouped", codeGenGrouped?"grouped":"ungrouped"); queryJobChannel().fireException(e); } IOutputMetaData *recordSize = helper->queryDiskRecordSize()->querySerializedDiskMeta(); if (recordSize->isFixedSize()) // fixed size { if (0 != fileDesc->queryProperties().getPropInt("@recordSize")) { size32_t rSz = fileDesc->queryProperties().getPropInt("@recordSize"); if (isGrouped) rSz--; // eog byte not to be included in this test. if (rSz != recordSize->getMinRecordSize()) throw MakeThorException(TE_RecordSizeMismatch, "Published record size %d for file %s, does not match coded record size %d", rSz, fileName.get(), recordSize->getMinRecordSize()); } if (!fileDesc->isCompressed() && (TDXcompress & helper->getFlags())) { size32_t rSz = recordSize->getMinRecordSize(); if (isGrouped) rSz++; if (rSz >= MIN_ROWCOMPRESS_RECSIZE) { Owned<IException> e = MakeActivityWarning(&container, TE_CompressionMismatch, "Ignoring compression attribute on file '%s', which is not published as compressed in DFS", fileName.get()); queryJobChannel().fireException(e); } } } if (0 == (TDRnocrccheck & helper->getFlags())) checkFormatCrc(this, file, helper->getFormatCrc(), false); }
void init() { CWriteMasterBase::init(); IHThorDiskWriteArg *helper=(IHThorDiskWriteArg *)queryHelper(); IOutputMetaData *irecsize = helper->queryDiskRecordSize()->querySerializedDiskMeta(); IPropertyTree &props = fileDesc->queryProperties(); if (0 != (helper->getFlags() & TDXgrouped)) props.setPropBool("@grouped", true); if (irecsize->isFixedSize()) // fixed size { size32_t rSz = irecsize->getMinRecordSize(); if (0 != (helper->getFlags() & TDXgrouped)) ++rSz; props.setPropInt("@recordSize", rSz); } props.setPropInt("@formatCrc", helper->getFormatCrc()); }
virtual void process() { CMasterActivity::process(); IHThorDistributionArg * helper = (IHThorDistributionArg *)queryHelper(); IOutputMetaData *rcSz = helper->queryInternalRecordSize(); unsigned nslaves = container.queryJob().querySlaves(); IDistributionTable * * result = (IDistributionTable * *)createThorRow(rcSz->getMinRecordSize()); // not a real row helper->clearAggregate(result); while (nslaves--) { rank_t sender; CMessageBuffer msg; if (!receiveMsg(msg, RANK_ALL, mpTag, &sender)) return; #if THOR_TRACE_LEVEL >= 5 ActPrintLog("Received distribution result from node %d", (unsigned)sender); #endif if (msg.length()) helper->merge(result, msg); } StringBuffer tmp; tmp.append("<XML>"); helper->gatherResult(result, tmp); tmp.append("</XML>"); #if THOR_TRACE_LEVEL >= 5 ActPrintLog("Distribution result: %s", tmp.str()); #endif helper->sendResult(tmp.length(), tmp.str()); destroyThorRow(result); }
virtual void init() { CMasterActivity::init(); OwnedRoxieString helperFileName = helper->getFileName(); StringBuffer expandedFileName; queryThorFileManager().addScope(container.queryJob(), helperFileName, expandedFileName, false); fileName.set(expandedFileName); dlfn.set(fileName); isLocal = 0 != (TIWlocal & helper->getFlags()); IOutputMetaData * diskSize = helper->queryDiskRecordSize(); unsigned minSize = diskSize->getMinRecordSize(); if (minSize > KEYBUILD_MAXLENGTH) throw MakeActivityException(this, 0, "Index minimum record length (%d) exceeds %d internal limit", minSize, KEYBUILD_MAXLENGTH); unsigned maxSize; if (diskSize->isVariableSize()) { if (TIWmaxlength & helper->getFlags()) maxSize = helper->getMaxKeySize(); else maxSize = KEYBUILD_MAXLENGTH; //Current default behaviour, could be improved in the future } else maxSize = diskSize->getFixedSize(); if (maxSize > KEYBUILD_MAXLENGTH) throw MakeActivityException(this, 0, "Index maximum record length (%d) exceeds %d internal limit. Maximum size = %d, try adjusting index MAXLENGTH", maxSize, KEYBUILD_MAXLENGTH, minSize); singlePartKey = 0 != (helper->getFlags() & TIWsmall) || dlfn.isExternal(); clusters.kill(); unsigned idx=0; while (true) { OwnedRoxieString cluster(helper->getCluster(idx)); if(!cluster) break; clusters.append(cluster); idx++; } IArrayOf<IGroup> groups; if (singlePartKey) { isLocal = true; buildTlk = false; } else if (!isLocal || globals->getPropBool("@buildLocalTlks", true)) buildTlk = true; fillClusterArray(container.queryJob(), fileName, clusters, groups); unsigned restrictedWidth = 0; if (TIWhaswidth & helper->getFlags()) { restrictedWidth = helper->getWidth(); if (restrictedWidth > container.queryJob().querySlaves()) throw MakeActivityException(this, 0, "Unsupported, can't refactor to width(%d) larger than host cluster(%d)", restrictedWidth, container.queryJob().querySlaves()); else if (restrictedWidth < container.queryJob().querySlaves()) { if (!isLocal) throw MakeActivityException(this, 0, "Unsupported, refactoring to few parts only supported for local indexes."); assertex(!singlePartKey); unsigned gwidth = groups.item(0).ordinality(); if (0 != container.queryJob().querySlaves() % gwidth) throw MakeActivityException(this, 0, "Unsupported, refactored target size (%d) must be factor of thor cluster width (%d)", groups.item(0).ordinality(), container.queryJob().querySlaves()); if (0 == restrictedWidth) restrictedWidth = gwidth; ForEachItemIn(g, groups) { IGroup &group = groups.item(g); if (gwidth != groups.item(g).ordinality()) throw MakeActivityException(this, 0, "Unsupported, cannot output multiple refactored widths, targeting cluster '%s' and '%s'", clusters.item(0), clusters.item(g)); if (gwidth != restrictedWidth) groups.replace(*group.subset((unsigned)0, restrictedWidth), g); } refactor = true; }
void CDiskWriteSlaveActivityBase::open() { if (dlfn.isExternal() && !firstNode()) { input.setown(createDataLinkSmartBuffer(this, inputs.item(0), PROCESS_SMART_BUFFER_SIZE, isSmartBufferSpillNeeded(this), grouped, RCUNBOUND, NULL, false, &container.queryJob().queryIDiskUsage())); startInput(input); if (!rfsQueryParallel) { ActPrintLog("Blocked, waiting for previous part to complete write"); CMessageBuffer msg; if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag)) return; rowcount_t prevRows; msg.read(prevRows); msg.read(tempExternalName); // reuse temp filename, last node will rename ActPrintLog("Previous write row count = %" RCPF "d", prevRows); } } else { input.set(inputs.item(0)); startInput(input); } processed = THORDATALINK_STARTED; bool extend = 0 != (diskHelperBase->getFlags() & TDWextend); if (extend) ActPrintLog("Extending file %s", fName.get()); /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression. * NB: only for FLAT files, not CSV or XML */ size32_t diskRowMinSz = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { diskRowMinSz = diskRowMeta->getMinRecordSize(); if (grouped) diskRowMinSz += 1; } calcFileCrc = true; bool external = dlfn.isExternal(); bool query = dlfn.isQuery(); if (query && compress) UNIMPLEMENTED; unsigned twFlags = external ? TW_External : 0; if (query || (external && !firstNode())) twFlags |= TW_Direct; if (!external || (!query && lastNode())) twFlags |= TW_RenameToPrimary; if (extend||(external&&!query)) twFlags |= TW_Extend; Owned<IFileIO> iFileIO = createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL); if (compress) { ActPrintLog("Performing row compression on output file: %s", fName.get()); // NB: block compressed output has implicit crc of 0, no need to calculate in row writer. calcFileCrc = false; } Owned<IFileIOStream> stream; if (wantRaw()) { outraw.setown(createBufferedIOStream(iFileIO)); stream.set(outraw); } else { stream.setown(createIOStream(iFileIO)); unsigned rwFlags = 0; if (grouped) rwFlags |= rw_grouped; if (calcFileCrc) rwFlags |= rw_crc; out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags)); } if (extend || (external && !query)) stream->seek(0,IFSend); ActPrintLog("Created output stream for %s", fName.get()); }
void CWriteMasterBase::publish() { if (published) return; published = true; if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp))) updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed); IPropertyTree &props = fileDesc->queryProperties(); props.setPropInt64("@recordCount", recordsProcessed); if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints()) { if (0 != (diskHelperBase->getFlags() & TDWexpires)) setExpiryTime(props, diskHelperBase->getExpiryDays()); if (TDWupdate & diskHelperBase->getFlags()) { unsigned eclCRC; unsigned __int64 totalCRC; diskHelperBase->getUpdateCRCs(eclCRC, totalCRC); props.setPropInt("@eclCRC", eclCRC); props.setPropInt64("@totalCRC", totalCRC); } } container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters); if (!dlfn.isExternal()) { bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary); if (!temporary && (queryJob().querySlaves() < fileDesc->numParts())) { // create empty parts for a fileDesc being published that is larger than this clusters size32_t recordSize = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { recordSize = diskRowMeta->getMinRecordSize(); if (0 != (diskHelperBase->getFlags() & TDXgrouped)) recordSize += 1; } unsigned compMethod = COMPRESS_METHOD_LZW; // rowdiff used if recordSize > 0, else fallback to compMethod if (getOptBool(THOROPT_COMP_FORCELZW, false)) { recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod. compMethod = COMPRESS_METHOD_LZW; } else if (getOptBool(THOROPT_COMP_FORCEFLZ, false)) compMethod = COMPRESS_METHOD_FASTLZ; else if (getOptBool(THOROPT_COMP_FORCELZ4, false)) compMethod = COMPRESS_METHOD_LZ4; bool blockCompressed; bool compressed = fileDesc->isCompressed(&blockCompressed); for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++) { StringBuffer clusterName; fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore()); PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str()); unsigned p=0; while (p<fileDesc->numParts()) { if (p == targetOffset) p += queryJob().querySlaves(); IPartDescriptor *partDesc = fileDesc->queryPart(p); CDateTime createTime, modifiedTime; for (unsigned c=0; c<partDesc->numCopies(); c++) { RemoteFilename rfn; partDesc->getFilename(c, rfn); StringBuffer path; rfn.getPath(path); try { ensureDirectoryForFile(path.str()); OwnedIFile iFile = createIFile(path.str()); Owned<IFileIO> iFileIO; if (compressed) iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod)); else iFileIO.setown(iFile->open(IFOcreate)); dbgassertex(iFileIO.get()); iFileIO.clear(); // ensure copies have matching datestamps, as they would do normally (backupnode expects it) if (partDesc->numCopies() > 1) { if (0 == c) iFile->getTime(&createTime, &modifiedTime, NULL); else iFile->setTime(&createTime, &modifiedTime, NULL); } } catch (IException *e) { if (0 == c) throw; Owned<IThorException> e2 = MakeThorException(e); e->Release(); e2->setAction(tea_warning); queryJob().fireException(e2); } } partDesc->queryProperties().setPropInt64("@size", 0); p++; } clusterIdx++; } } queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL); } }