CDiskReadSlaveActivityBase::CDiskReadSlaveActivityBase(CGraphElementBase *_container) : CSlaveActivity(_container) { helper = (IHThorDiskReadBaseArg *)queryHelper(); reInit = 0 != (helper->getFlags() & (TDXvarfilename|TDXdynamicfilename)); crcCheckCompressed = getOptBool(THOROPT_READCOMPRESSED_CRC, false); markStart = gotMeta = false; checkFileCrc = !globals->getPropBool("Debug/@fileCrcDisabled", false); checkFileCrc = getOptBool(THOROPT_READ_CRC, checkFileCrc); }
/** * Build final CHP files */ void DmetEngine::generateChpFiles() { DmetCHPWriter engine; AnalysisInfo info; fillInAnalysisInfo(info); engine.setAnalysisInfo(info); vector<string> celFiles = getOptVector("cels"); engine.setOpt("cels",celFiles); engine.setOpt("out-dir",Fs::join(getOpt("out-dir"),"chp")); engine.setOpt("verbose",getOpt("verbose")); engine.setOpt("set-analysis-name", getOpt("set-analysis-name")); engine.setOpt("exec-guid", getOpt("exec-guid")); engine.setOpt("program-name", getOpt("program-name")); engine.setOpt("program-company", getOpt("program-company")); engine.setOpt("program-version", getOpt("program-version")); engine.setOpt("batch-name", getOpt("batch-name")); if (getOptBool("run-cn-engine")) { engine.setOpt("a5-copynumber", Fs::join(getOpt("out-dir"), "adc", getOpt("set-analysis-name") + ".copynumber.a5")); } engine.setOpt("a5-summaries", Fs::join(getOpt("out-dir"), "apg", getOpt("set-analysis-name") + ".summary.a5")); engine.setOpt("a5-calls", Fs::join(getOpt("out-dir"), "apg", getOpt("set-analysis-name") + ".calls.a5")); engine.setOpt("a5-forced-calls", Fs::join(getOpt("out-dir"), "apg", getOpt("set-analysis-name") + ".forced-calls.a5")); engine.setOpt("a5-confidences", Fs::join(getOpt("out-dir"), "apg", getOpt("set-analysis-name") + ".confidences.a5")); engine.setOpt("a5-context", Fs::join(getOpt("out-dir"), "apg", getOpt("set-analysis-name") + ".context.a5")); engine.setOpt("report-file", Fs::join(getOpt("out-dir"), "apg", getOpt("set-analysis-name") + ".report.txt")); engine.setOpt("spf-file",getOpt("spf-file")); engine.setOpt("cdf-file",getOpt("cdf-file")); engine.setOpt("region-model", getOpt("region-model")); engine.setOpt("null-context", getOpt("null-context")); engine.setOpt("geno-call-thresh", getOpt("geno-call-thresh")); engine.setOpt("call-coder-max-alleles", getOpt("call-coder-max-alleles")); engine.setOpt("call-coder-type", getOpt("call-coder-type")); engine.setOpt("call-coder-version", getOpt("call-coder-version")); engine.run(); }
/** * compute CHP files */ void DmetEngine::runImp() { setOpt("analysis-guid", affxutil::Guid::GenerateNewGuid()); string errMsg; if(!Fs::isWriteableDir(getOpt("out-dir"))) if(Fs::mkdirPath(getOpt("out-dir"), false) != APT_OK) Err::errAbort("Can't make or write to directory: " + getOpt("out-dir")); if(getOptBool("run-cn-engine")) { Verbose::out(1,""); Verbose::out(1,"Step 1: Computing probeset summaries for copy number state calling"); if(computeCnSummaries()) { Verbose::out(1,""); Verbose::out(1,"Step 2: Computing copy number states"); computeCnState(); setOpt("run-cn-engine","true"); } else { Verbose::out(1,""); Verbose::out(1,"No CN regions to compute. Skipping step 2."); setOpt("run-cn-engine","false"); } } else { Verbose::out(1,""); Verbose::out(1,"Not computing CN state. Skipping steps 1 and 2."); setOpt("run-cn-engine","false"); } Verbose::out(1,""); Verbose::out(1,"Step 3: Computing genotypes"); computeGenotypes(); if(getOptBool("cc-chp-output")) { Verbose::out(1,""); Verbose::out(1,"Step 4: Generating CHP files"); generateChpFiles(); } Verbose::out(1,""); Verbose::out(1, "Done."); }
// IThorDataLink virtual void start() override { ActivityTimer s(totalCycles, timeActivities); PARENT::start(); bool hintunsortedoutput = getOptBool(THOROPT_UNSORTED_OUTPUT, (JFreorderable & helper->getJoinFlags()) != 0); bool hintparallelmatch = getOptBool(THOROPT_PARALLEL_MATCH, hintunsortedoutput); // i.e. unsorted, implies use parallel by default, otherwise no point if (helper->getJoinFlags()&JFlimitedprefixjoin) { CriticalBlock b(joinHelperCrit); // use std join helper (less efficient but implements limited prefix) joinhelper.setown(createJoinHelper(*this, helper, this, hintparallelmatch, hintunsortedoutput)); } else { CriticalBlock b(joinHelperCrit); joinhelper.setown(createSelfJoinHelper(*this, helper, this, hintparallelmatch, hintunsortedoutput)); } strm.setown(isLightweight? doLightweightSelfJoin() : (isLocal ? doLocalSelfJoin() : doGlobalSelfJoin())); assertex(strm); joinhelper->init(strm, NULL, ::queryRowAllocator(queryInput(0)), ::queryRowAllocator(queryInput(0)), ::queryRowMetaData(queryInput(0))); }
void CWriteMasterBase::publish() { if (published) return; published = true; if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp))) updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed); IPropertyTree &props = fileDesc->queryProperties(); props.setPropInt64("@recordCount", recordsProcessed); if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints()) { if (0 != (diskHelperBase->getFlags() & TDWexpires)) setExpiryTime(props, diskHelperBase->getExpiryDays()); if (TDWupdate & diskHelperBase->getFlags()) { unsigned eclCRC; unsigned __int64 totalCRC; diskHelperBase->getUpdateCRCs(eclCRC, totalCRC); props.setPropInt("@eclCRC", eclCRC); props.setPropInt64("@totalCRC", totalCRC); } } container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters); if (!dlfn.isExternal()) { bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary); if (!temporary && (queryJob().querySlaves() < fileDesc->numParts())) { // create empty parts for a fileDesc being published that is larger than this clusters size32_t recordSize = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { recordSize = diskRowMeta->getMinRecordSize(); if (0 != (diskHelperBase->getFlags() & TDXgrouped)) recordSize += 1; } unsigned compMethod = COMPRESS_METHOD_LZW; // rowdiff used if recordSize > 0, else fallback to compMethod if (getOptBool(THOROPT_COMP_FORCELZW, false)) { recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod. compMethod = COMPRESS_METHOD_LZW; } else if (getOptBool(THOROPT_COMP_FORCEFLZ, false)) compMethod = COMPRESS_METHOD_FASTLZ; else if (getOptBool(THOROPT_COMP_FORCELZ4, false)) compMethod = COMPRESS_METHOD_LZ4; bool blockCompressed; bool compressed = fileDesc->isCompressed(&blockCompressed); for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++) { StringBuffer clusterName; fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore()); PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str()); unsigned p=0; while (p<fileDesc->numParts()) { if (p == targetOffset) p += queryJob().querySlaves(); IPartDescriptor *partDesc = fileDesc->queryPart(p); CDateTime createTime, modifiedTime; for (unsigned c=0; c<partDesc->numCopies(); c++) { RemoteFilename rfn; partDesc->getFilename(c, rfn); StringBuffer path; rfn.getPath(path); try { ensureDirectoryForFile(path.str()); OwnedIFile iFile = createIFile(path.str()); Owned<IFileIO> iFileIO; if (compressed) iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod)); else iFileIO.setown(iFile->open(IFOcreate)); dbgassertex(iFileIO.get()); iFileIO.clear(); // ensure copies have matching datestamps, as they would do normally (backupnode expects it) if (partDesc->numCopies() > 1) { if (0 == c) iFile->getTime(&createTime, &modifiedTime, NULL); else iFile->setTime(&createTime, &modifiedTime, NULL); } } catch (IException *e) { if (0 == c) throw; Owned<IThorException> e2 = MakeThorException(e); e->Release(); e2->setAction(tea_warning); queryJob().fireException(e2); } } partDesc->queryProperties().setPropInt64("@size", 0); p++; } clusterIdx++; } } queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL); } }
void DmetEngine::fillInAnalysisInfo(AnalysisInfo &info) { string prefix = "apt-"; vector<string> celFiles = getOptVector("cels"); info.m_AlgVersion = "3.0"; info.m_AlgName = getOpt("set-analysis-name"); info.m_ProgramName = getOpt("program-name"); info.m_ProgramVersion = getOpt("version-to-report"); info.m_ProgramCompany = getOpt("program-company"); info.m_ChipType = getOpt("chip-type"); info.m_ProgID = ""; info.m_ExecGuid = getOpt("exec-guid"); info.m_AnalysisGuid = getOpt("analysis-guid"); // Execution info info.addParam("apt-engine", "DmetEngine"); info.addParam(prefix + "program-name", getOpt("program-name")); info.addParam(prefix + "command-line", getOpt("command-line")); info.addParam(prefix + "exec-guid", getOpt("exec-guid")); info.addParam(prefix + "analysis-guid", getOpt("analysis-guid")); info.addParam(prefix + "time-str", getOpt("time-start")); info.addParam(prefix + "version", getOpt("version-to-report")); info.addParam(prefix + "cvs-id", getOpt("program-cvs-id")); // Engine Options string opt = "opt-"; vector<string> initialOptionNames; getOptionNames(initialOptionNames,1); for(int i=0; i<initialOptionNames.size(); i++) { string name = initialOptionNames[i]; if((name != "cels") || getOptBool("batch-info")){ info.addParam(prefix + opt + name, makeVal(name,1)); } } // Cel files in batch if(getOptBool("batch-info")){ for(uint32_t i = 0; i < celFiles.size(); i++) { std::string paramName = prefix + opt + "cel-" + ToStr(i+1); info.addParam(paramName, Fs::basename(celFiles[i])); affymetrix_fusion_io::FusionCELData cel; try { cel.SetFileName(celFiles[i].c_str()); if(!cel.ReadHeader()) { Err::errAbort("Unable to read CEL file " + celFiles[i]); } affymetrix_calvin_io::GenericData *gdata = cel.GetGenericData(); if (gdata != NULL) { std::string paramName = prefix + opt + "cel-guid-" + ToStr(i+1); info.addParam(paramName, gdata->Header().GetGenericDataHdr()->GetFileId()); } cel.Close(); } catch (...) { Err::errAbort("Unable to read CEL file " + celFiles[i]); } } } // Engine State string option = "state-"; std::vector<std::string> optionNames; getOptionNames(optionNames); for(int i=0; i<optionNames.size(); i++) { string name = optionNames[i]; if((name != "cels") || getOptBool("batch-info")){ info.addParam(prefix + option + name, makeVal(name)); } } // Sanity check Err::check(info.m_ParamValues.size() == info.m_ParamNames.size(), "AnalysisInfo - Names and values out of sync."); }
/** * Compute genotypes */ void DmetEngine::computeGenotypes() { ProbesetGenotypeEngine engine; vector<string> celFiles = getOptVector("cels"); engine.setOpt("cels",celFiles); vector<string> chipTypes = getOptVector("chip-type"); if(chipTypes.size() > 0) engine.setOpt("chip-type",chipTypes); engine.setOpt("cdf-file",getOpt("cdf-file")); engine.setOpt("spf-file",getOpt("spf-file")); string analysis; if(getOpt("gt-analysis") != "") { analysis = getOpt("gt-analysis"); } else { ///@todo If we have a reference, set analysis and qmethod-spec based on /// what was used to build the reference if(getOpt("reference-output") != "") { analysis = "quant-norm.sketch=50000,pm-only,brlmm-p-multi.CM=1.bins=100.mix=1.bic=2.lambda=1.0.HARD=3.SB=0.75.KX=0.3.KH=0.3.KXX=0.1.KAH=-0.1.KHB=-0.1.KYAH=-0.05.KYHB=-0.05.KYAB=-0.1.transform=MVA.AAM=2.8.BBM=-2.8.AAV=0.10.BBV=0.10.ABV=0.10.V=1.AAY=10.7.ABY=11.3.BBY=10.7.copyqc=0.00000.wobble=0.05.MS=0.1.copytype=-1.clustertype=2.CSepPen=0.5.ocean=0.0000001"; } else { analysis = "quant-norm.sketch=50000,pm-only,brlmm-p-multi.CM=2.bins=100.mix=1.bic=2.lambda=1.0.HARD=3.SB=0.75.KX=0.3.KH=0.3.KXX=0.1.KAH=-0.1.KHB=-0.1.KYAH=-0.05.KYHB=-0.05.KYAB=-0.1.transform=MVA.AAM=2.8.BBM=-2.8.AAV=0.10.BBV=0.10.ABV=0.10.V=1.AAY=10.7.ABY=11.3.BBY=10.7.copyqc=0.00000.wobble=0.05.MS=0.1.copytype=-1.clustertype=2.CSepPen=0.5.ocean=0.0000001"; } analysis += ".cc-alleles=6"; analysis += ".cc-type=UCHAR"; analysis += ".cc-version=1.0"; } if(getOpt("gt-qmethod-spec") != "") engine.setOpt("qmethod-spec",getOpt("gt-qmethod-spec")); else engine.setOpt("qmethod-spec","plier.optmethod=1.FixFeatureEffect=true"); analysis = setPra(analysis,getOptInt("pra-thresh")); engine.setOpt("analysis",analysis); engine.setOpt("out-dir",Fs::join(getOpt("out-dir"),"apg")); engine.setOpt("verbose",getOpt("verbose")); engine.setOpt("force",getOpt("force")); engine.setOpt("cc-chp-output","false"); engine.setOpt("probeset-ids-reported",getOpt("probeset-ids-reported")); engine.setOpt("probeset-ids",getOpt("probeset-ids")); engine.setOpt("temp-dir", getOpt("temp-dir")); engine.setOpt("use-disk", getOpt("use-disk")); engine.setOpt("disk-cache", getOpt("disk-cache")); engine.setOpt("set-analysis-name", getOpt("set-analysis-name")); engine.setOpt("call-coder-max-alleles", getOpt("call-coder-max-alleles")); engine.setOpt("call-coder-type", getOpt("call-coder-type")); engine.setOpt("call-coder-version", getOpt("call-coder-version")); engine.setOpt("command-line", getOpt("command-line")); engine.setOpt("exec-guid", getOpt("exec-guid")); engine.setOpt("program-name", getOpt("program-name")); engine.setOpt("program-company", getOpt("program-company")); engine.setOpt("program-version", getOpt("program-version")); engine.setOpt("program-cvs-id", getOpt("program-cvs-id")); engine.setOpt("version-to-report", getOpt("version-to-report")); engine.setOpt("prior-size", "1"); engine.setOpt("special-snps", getOpt("special-snps")); engine.setOpt("em-gender", "false"); if(getOpt("chrX-probes") != "") { engine.setOpt("chrX-probes", getOpt("chrX-probes")); engine.setOpt("chrY-probes", getOpt("chrY-probes")); engine.setOpt("set-gender-method", "cn-probe-chrXY-ratio"); engine.setOpt("no-gender-force", "false"); } else { engine.setOpt("set-gender-method", "none"); engine.setOpt("no-gender-force", "true"); } engine.setOpt("female-thresh", getOpt("female-thresh")); engine.setOpt("male-thresh", getOpt("male-thresh")); engine.setOpt("table-output", "false"); engine.setOpt("output-context", "true"); engine.setOpt("output-forced-calls", "true"); if (getOptBool("run-cn-engine") && getOpt("cn-region-gt-probeset-file") != "") { /// @todo perhaps we should use state to track the file name /// as it is we need to keep this in sync with the cn engine call engine.setOpt("genotype-markers-cn-file", Fs::join(getOpt("out-dir"), "adc", getOpt("set-analysis-name") + ".gt.markers.cn.call.a5")); } engine.setOpt("a5-global-file",getOpt("reference-output")); engine.setOpt("a5-global-file-no-replace","true"); engine.setOpt("a5-group","ProbesetGenotypeEngine"); engine.setOpt("a5-calls","true"); engine.setOpt("a5-summaries","true"); engine.setOpt("a5-feature-effects","true"); engine.setOpt("a5-sketch","true"); engine.setOpt("a5-write-models","true"); if(getOpt("reference-output") != "") { engine.setOpt("a5-sketch-use-global","true"); engine.setOpt("a5-feature-effects-use-global","true"); engine.setOpt("a5-write-models-use-global","true"); } engine.setOpt("a5-global-input-file",getOpt("reference-input")); engine.setOpt("a5-input-group","ProbesetGenotypeEngine"); if(getOpt("reference-input") != "") { engine.setOpt("a5-sketch-input-global","true"); engine.setOpt("a5-feature-effects-input-global","true"); engine.setOpt("a5-models-input-global","true"); } if(m_ArgvPosAPG > 0) engine.parseArgv(m_argv, m_ArgvPosAPG); engine.run(); }
/** * Make sure that our options are sane. Call Err::errAbort if not. */ void DmetEngine::checkOptionsImp() { defineStates(); setLibFileOpt("cdf-file"); setLibFileOpt("spf-file"); setLibFileOpt("special-snps"); setLibFileOpt("chrX-probes"); setLibFileOpt("chrY-probes"); setLibFileOpt("reference-input"); setLibFileOpt("probeset-ids"); setLibFileOpt("probeset-ids-reported"); setLibFileOpt("region-model"); setLibFileOpt("probeset-model"); setLibFileOpt("cn-region-gt-probeset-file"); if (getOpt("out-dir") == "") {Err::errAbort("Must specify an output directory.");} if (getOpt("temp-dir") == "") { setOpt("temp-dir", Fs::join(getOpt("out-dir"),"temp")); } string cdfFile = getOpt("cdf-file"); string spfFile = getOpt("spf-file"); string specialSnps = getOpt("special-snps"); string chrXProbes = getOpt("chrX-probes"); string chrYProbes = getOpt("chrY-probes"); if(getOpt("sample-type") == "plasmid") { setOpt("run-cn-engine","false"); } else { setOpt("run-cn-engine",getOpt("run-cn-engine")); } string refOut = getOpt("reference-output"); string batchName = getOpt("batch-name"); if(refOut == "") { if (batchName.empty() != true) Err::errAbort("You cannot provide a batch-name when running in single sample mode. batch-name is only valid when output-reference is specified."); } else { if (batchName.empty() == true) Err::errAbort("You must define the batch-name parameter"); if(Fs::isReadable(refOut)) if(Fs::rm(refOut, false) != APT_OK) Err::errAbort("Unable to remove existing reference-output file '" + refOut + "'"); } ///@todo check chip type in reference file ///@todo check reference file version /* Read in cel file list from other file if specified. */ vector<string> celFiles; EngineUtil::getCelFiles(celFiles, this); if(celFiles.size() == 0) Err::errAbort("No cel files specified."); setOpt("cels",celFiles); // Build a consent file if vector of markers was passed in vector<string> consented = getOptVector("report"); if(consented.size() > 0) { FsPath probeset_path; probeset_path.setPath(getOpt("out-dir"),"probesets-reported","txt"); //probeset_path.ensureWriteableDirPath(); Fs::ensureWriteableDirPath(getOpt("out-dir", false)); writeProbesetList(probeset_path.asUnixPath(), consented); setOpt("probeset-ids-reported", probeset_path.asUnixPath()); } else { setOpt("probeset-ids-reported", getOpt("probeset-ids-reported")); } if(cdfFile == "" && spfFile == "") Err::errAbort("Must specify either a cdf file or spf (simple probe format) file."); if (chrXProbes != "" && chrYProbes == "") Err::errAbort("Must provide a chrY Probe File when providing a chrX Probe File."); if (chrXProbes == "" && chrYProbes != "") Err::errAbort("Must provide a chrX Probe File when providing a chrY Probe File."); // Check chip types vector<string> chipTypesInLayout; /* Get the intial info about the chip and check cel files to make sure they match. */ colrow_t numRows = 0, numCols = 0; int probeCount=0, probeSetCount=0; if(cdfFile != "") EngineUtil::getCdfChipType(chipTypesInLayout, numRows, numCols, probeCount, probeSetCount, cdfFile); else if(spfFile != "") EngineUtil::getSpfChipType(chipTypesInLayout, numRows, numCols, probeCount, probeSetCount, spfFile); else Err::errAbort("Must specify either a cdf file or spf (simple probe format) file."); setOpt("num-rows", ToStr(numRows)); setOpt("num-cols", ToStr(numCols)); setOpt("probe-count", ToStr(probeCount)); if(chipTypesInLayout.empty() || chipTypesInLayout[0] == "" || probeCount == 0) Err::errAbort("Problem determining ChipType in file: " + ( cdfFile != "" ? cdfFile : spfFile)); /* Did the user "force" a set of chip types via options? */ vector<string> chipTypesSupplied = getOptVector("chip-type"); /* Figure out what chip type to report */ if(chipTypesSupplied.size() > 0) { setOpt("chip-type", chipTypesSupplied); } else if(chipTypesInLayout.size() > 0) { setOpt("chip-type", chipTypesInLayout); } else { Err::errAbort("Unable to figure out a chip type."); } /* Do Chip Type Check */ if(!getOptBool("force")) { vector<string> chipTypeJustPrimary; vector<string> chipTypesToCheck; if(chipTypesSupplied.size() > 0) { chipTypesToCheck = chipTypesSupplied; EngineUtil::checkChipTypeVectors(chipTypesSupplied, chipTypesInLayout); } else { chipTypesToCheck = chipTypesInLayout; } chipTypeJustPrimary.push_back(chipTypesToCheck[0]); EngineUtil::checkCelChipTypes(chipTypesToCheck, probeCount, celFiles, numRows, numCols); // Check special SNPs files if (specialSnps != "") { EngineUtil::checkTsvFileChipType(specialSnps, chipTypeJustPrimary); } // And other files if (chrXProbes != "") { EngineUtil::checkTsvFileChipType(chrXProbes, chipTypesToCheck); } if (chrYProbes != "") { EngineUtil::checkTsvFileChipType(chrYProbes, chipTypesToCheck); } } // end if(!force) }
void CDiskWriteSlaveActivityBase::open() { start(); if (dlfn.isExternal() && !firstNode()) { if (!rfsQueryParallel) { ActPrintLog("Blocked, waiting for previous part to complete write"); CMessageBuffer msg; if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag)) return; rowcount_t prevRows; msg.read(prevRows); msg.read(tempExternalName); // reuse temp filename, last node will rename ActPrintLog("Previous write row count = %" RCPF "d", prevRows); } } processed = THORDATALINK_STARTED; bool extend = 0 != (diskHelperBase->getFlags() & TDWextend); if (extend) ActPrintLog("Extending file %s", fName.get()); /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression. * NB: only for FLAT files, not CSV or XML */ size32_t diskRowMinSz = 0; IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta(); if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind())) { diskRowMinSz = diskRowMeta->getMinRecordSize(); if (grouped) diskRowMinSz += 1; } if (compress) calcFileCrc = getOptBool(THOROPT_WRITECOMPRESSED_CRC, false); else calcFileCrc = getOptBool(THOROPT_WRITE_CRC, true); bool external = dlfn.isExternal(); bool query = dlfn.isQuery(); if (query && compress) UNIMPLEMENTED; unsigned twFlags = external ? TW_External : 0; if (query || (external && !firstNode())) twFlags |= TW_Direct; if (!external || (!query && lastNode())) twFlags |= TW_RenameToPrimary; if (extend||(external&&!query)) twFlags |= TW_Extend; if (diskHelperBase->getFlags() & TDXtemporary) twFlags |= TW_Temporary; { CriticalBlock block(statsCs); outputIO.setown(createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL)); } if (compress) { ActPrintLog("Performing compression on output file: %s", fName.get()); // NB: block compressed output has implicit crc of 0, no need to calculate in row writer. calcFileCrc = false; } Owned<IFileIOStream> stream; if (wantRaw()) { outraw.setown(createBufferedIOStream(outputIO)); stream.set(outraw); } else { stream.setown(createIOStream(outputIO)); unsigned rwFlags = 0; if (grouped) rwFlags |= rw_grouped; if (calcFileCrc) rwFlags |= rw_crc; out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags)); } if (extend || (external && !query)) stream->seek(0,IFSend); ActPrintLog("Created output stream for %s, calcFileCrc=%s", fName.get(), calcFileCrc?"true":"false"); }