示例#1
0
CDiskReadSlaveActivityBase::CDiskReadSlaveActivityBase(CGraphElementBase *_container) : CSlaveActivity(_container)
{
    helper = (IHThorDiskReadBaseArg *)queryHelper();
    reInit = 0 != (helper->getFlags() & (TDXvarfilename|TDXdynamicfilename));
    crcCheckCompressed = getOptBool(THOROPT_READCOMPRESSED_CRC, false);
    markStart = gotMeta = false;
    checkFileCrc = !globals->getPropBool("Debug/@fileCrcDisabled", false);
    checkFileCrc = getOptBool(THOROPT_READ_CRC, checkFileCrc);
}
/**
 * Build final CHP files
 */
void DmetEngine::generateChpFiles() {

  DmetCHPWriter engine;

  AnalysisInfo info;
  fillInAnalysisInfo(info);
  engine.setAnalysisInfo(info);

  vector<string> celFiles = getOptVector("cels");
  engine.setOpt("cels",celFiles);
  engine.setOpt("out-dir",Fs::join(getOpt("out-dir"),"chp"));
  engine.setOpt("verbose",getOpt("verbose"));
  engine.setOpt("set-analysis-name", getOpt("set-analysis-name"));
  engine.setOpt("exec-guid", getOpt("exec-guid"));
  engine.setOpt("program-name", getOpt("program-name"));
  engine.setOpt("program-company", getOpt("program-company"));
  engine.setOpt("program-version", getOpt("program-version"));
  engine.setOpt("batch-name", getOpt("batch-name"));

  if (getOptBool("run-cn-engine")) {
    engine.setOpt("a5-copynumber",
                  Fs::join(getOpt("out-dir"), "adc",
                               getOpt("set-analysis-name") + ".copynumber.a5"));
  }

  engine.setOpt("a5-summaries",
                Fs::join(getOpt("out-dir"), "apg",
                             getOpt("set-analysis-name") + ".summary.a5"));
  engine.setOpt("a5-calls", 
                Fs::join(getOpt("out-dir"), "apg",
                             getOpt("set-analysis-name") + ".calls.a5"));
  engine.setOpt("a5-forced-calls", 
                Fs::join(getOpt("out-dir"), "apg", getOpt("set-analysis-name") + ".forced-calls.a5"));
  engine.setOpt("a5-confidences", 
                Fs::join(getOpt("out-dir"), "apg",
                             getOpt("set-analysis-name") + ".confidences.a5"));
  engine.setOpt("a5-context", 
                Fs::join(getOpt("out-dir"), "apg",
                             getOpt("set-analysis-name") + ".context.a5"));
  engine.setOpt("report-file", 
                Fs::join(getOpt("out-dir"), "apg",
                             getOpt("set-analysis-name") + ".report.txt"));

  engine.setOpt("spf-file",getOpt("spf-file"));
  engine.setOpt("cdf-file",getOpt("cdf-file"));
  engine.setOpt("region-model", getOpt("region-model"));
  engine.setOpt("null-context", getOpt("null-context"));
  engine.setOpt("geno-call-thresh", getOpt("geno-call-thresh"));
  engine.setOpt("call-coder-max-alleles", getOpt("call-coder-max-alleles"));
  engine.setOpt("call-coder-type", getOpt("call-coder-type"));
  engine.setOpt("call-coder-version", getOpt("call-coder-version"));

  engine.run();
}
/**
 * compute CHP files
 */
void DmetEngine::runImp() {

    setOpt("analysis-guid", affxutil::Guid::GenerateNewGuid());

    string errMsg;

    if(!Fs::isWriteableDir(getOpt("out-dir")))
        if(Fs::mkdirPath(getOpt("out-dir"), false) != APT_OK)
            Err::errAbort("Can't make or write to directory: " + getOpt("out-dir"));
    if(getOptBool("run-cn-engine")) {
        Verbose::out(1,"");
        Verbose::out(1,"Step 1: Computing probeset summaries for copy number state calling");
        if(computeCnSummaries()) {
            Verbose::out(1,"");
            Verbose::out(1,"Step 2: Computing copy number states");
            computeCnState();
            setOpt("run-cn-engine","true");
        } else {
            Verbose::out(1,"");
            Verbose::out(1,"No CN regions to compute. Skipping step 2.");
            setOpt("run-cn-engine","false");
        }
    } else {
        Verbose::out(1,"");
        Verbose::out(1,"Not computing CN state. Skipping steps 1 and 2.");
        setOpt("run-cn-engine","false");
    }
    Verbose::out(1,"");
    Verbose::out(1,"Step 3: Computing genotypes");
    computeGenotypes();
    if(getOptBool("cc-chp-output")) {
        Verbose::out(1,"");
        Verbose::out(1,"Step 4: Generating CHP files");
        generateChpFiles();
    }
    Verbose::out(1,"");
    Verbose::out(1, "Done.");
}
示例#4
0
// IThorDataLink
    virtual void start() override
    {
        ActivityTimer s(totalCycles, timeActivities);
        PARENT::start();
        bool hintunsortedoutput = getOptBool(THOROPT_UNSORTED_OUTPUT, (JFreorderable & helper->getJoinFlags()) != 0);
        bool hintparallelmatch = getOptBool(THOROPT_PARALLEL_MATCH, hintunsortedoutput); // i.e. unsorted, implies use parallel by default, otherwise no point

        if (helper->getJoinFlags()&JFlimitedprefixjoin)
        {
            CriticalBlock b(joinHelperCrit);
            // use std join helper (less efficient but implements limited prefix)
            joinhelper.setown(createJoinHelper(*this, helper, this, hintparallelmatch, hintunsortedoutput));
        }
        else
        {
            CriticalBlock b(joinHelperCrit);
            joinhelper.setown(createSelfJoinHelper(*this, helper, this, hintparallelmatch, hintunsortedoutput));
        }
        strm.setown(isLightweight? doLightweightSelfJoin() : (isLocal ? doLocalSelfJoin() : doGlobalSelfJoin()));
        assertex(strm);

        joinhelper->init(strm, NULL, ::queryRowAllocator(queryInput(0)), ::queryRowAllocator(queryInput(0)), ::queryRowMetaData(queryInput(0)));
    }
示例#5
0
void CWriteMasterBase::publish()
{
    if (published) return;
    published = true;
    if (!(diskHelperBase->getFlags() & (TDXtemporary|TDXjobtemp)))
        updateActivityResult(container.queryJob().queryWorkUnit(), diskHelperBase->getFlags(), diskHelperBase->getSequence(), fileName, recordsProcessed);

    IPropertyTree &props = fileDesc->queryProperties();
    props.setPropInt64("@recordCount", recordsProcessed);
    if (0 == (diskHelperBase->getFlags() & TDXtemporary) || container.queryJob().queryUseCheckpoints())
    {
        if (0 != (diskHelperBase->getFlags() & TDWexpires))
            setExpiryTime(props, diskHelperBase->getExpiryDays());
        if (TDWupdate & diskHelperBase->getFlags())
        {
            unsigned eclCRC;
            unsigned __int64 totalCRC;
            diskHelperBase->getUpdateCRCs(eclCRC, totalCRC);
            props.setPropInt("@eclCRC", eclCRC);
            props.setPropInt64("@totalCRC", totalCRC);
        }
    }
    container.queryTempHandler()->registerFile(fileName, container.queryOwner().queryGraphId(), diskHelperBase->getTempUsageCount(), TDXtemporary & diskHelperBase->getFlags(), getDiskOutputKind(diskHelperBase->getFlags()), &clusters);
    if (!dlfn.isExternal())
    {
        bool temporary = 0 != (diskHelperBase->getFlags()&TDXtemporary);
        if (!temporary && (queryJob().querySlaves() < fileDesc->numParts()))
        {
            // create empty parts for a fileDesc being published that is larger than this clusters
            size32_t recordSize = 0;
            IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
            if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
            {
                recordSize = diskRowMeta->getMinRecordSize();
                if (0 != (diskHelperBase->getFlags() & TDXgrouped))
                    recordSize += 1;
            }
            unsigned compMethod = COMPRESS_METHOD_LZW;
            // rowdiff used if recordSize > 0, else fallback to compMethod
            if (getOptBool(THOROPT_COMP_FORCELZW, false))
            {
                recordSize = 0; // by default if fixed length (recordSize set), row diff compression is used. This forces compMethod.
                compMethod = COMPRESS_METHOD_LZW;
            }
            else if (getOptBool(THOROPT_COMP_FORCEFLZ, false))
                compMethod = COMPRESS_METHOD_FASTLZ;
            else if (getOptBool(THOROPT_COMP_FORCELZ4, false))
                compMethod = COMPRESS_METHOD_LZ4;
            bool blockCompressed;
            bool compressed = fileDesc->isCompressed(&blockCompressed);
            for (unsigned clusterIdx=0; clusterIdx<fileDesc->numClusters(); clusterIdx++)
            {
                StringBuffer clusterName;
                fileDesc->getClusterGroupName(clusterIdx, clusterName, &queryNamedGroupStore());
                PROGLOG("Creating blank parts for file '%s', cluster '%s'", fileName.get(), clusterName.str());
                unsigned p=0;
                while (p<fileDesc->numParts())
                {
                    if (p == targetOffset)
                        p += queryJob().querySlaves();
                    IPartDescriptor *partDesc = fileDesc->queryPart(p);
                    CDateTime createTime, modifiedTime;
                    for (unsigned c=0; c<partDesc->numCopies(); c++)
                    {
                        RemoteFilename rfn;
                        partDesc->getFilename(c, rfn);
                        StringBuffer path;
                        rfn.getPath(path);
                        try
                        {
                            ensureDirectoryForFile(path.str());
                            OwnedIFile iFile = createIFile(path.str());
                            Owned<IFileIO> iFileIO;
                            if (compressed)
                                iFileIO.setown(createCompressedFileWriter(iFile, recordSize, false, true, NULL, compMethod));
                            else
                                iFileIO.setown(iFile->open(IFOcreate));
                            dbgassertex(iFileIO.get());
                            iFileIO.clear();
                            // ensure copies have matching datestamps, as they would do normally (backupnode expects it)
                            if (partDesc->numCopies() > 1)
                            {
                                if (0 == c)
                                    iFile->getTime(&createTime, &modifiedTime, NULL);
                                else
                                    iFile->setTime(&createTime, &modifiedTime, NULL);
                            }
                        }
                        catch (IException *e)
                        {
                            if (0 == c)
                                throw;
                            Owned<IThorException> e2 = MakeThorException(e);
                            e->Release();
                            e2->setAction(tea_warning);
                            queryJob().fireException(e2);
                        }
                    }
                    partDesc->queryProperties().setPropInt64("@size", 0);
                    p++;
                }
                clusterIdx++;
            }
        }
        queryThorFileManager().publish(container.queryJob(), fileName, *fileDesc, NULL);
    }
}
void DmetEngine::fillInAnalysisInfo(AnalysisInfo &info) {
    string prefix = "apt-";

    vector<string> celFiles = getOptVector("cels");
    info.m_AlgVersion = "3.0";
    info.m_AlgName = getOpt("set-analysis-name");
    info.m_ProgramName = getOpt("program-name");
    info.m_ProgramVersion = getOpt("version-to-report");
    info.m_ProgramCompany = getOpt("program-company");
    info.m_ChipType = getOpt("chip-type");
    info.m_ProgID = "";
    info.m_ExecGuid = getOpt("exec-guid");
    info.m_AnalysisGuid = getOpt("analysis-guid");

    // Execution info
    info.addParam("apt-engine", "DmetEngine");
    info.addParam(prefix + "program-name", getOpt("program-name"));
    info.addParam(prefix + "command-line", getOpt("command-line"));
    info.addParam(prefix + "exec-guid", getOpt("exec-guid"));
    info.addParam(prefix + "analysis-guid", getOpt("analysis-guid"));
    info.addParam(prefix + "time-str", getOpt("time-start"));
    info.addParam(prefix + "version", getOpt("version-to-report"));
    info.addParam(prefix + "cvs-id", getOpt("program-cvs-id"));

    // Engine Options
    string opt = "opt-";
    vector<string> initialOptionNames;
    getOptionNames(initialOptionNames,1);
    for(int i=0; i<initialOptionNames.size(); i++) {
        string name = initialOptionNames[i];
        if((name != "cels") || getOptBool("batch-info")){
            info.addParam(prefix + opt + name, makeVal(name,1));
        }
    }

    // Cel files in batch
    if(getOptBool("batch-info")){
        for(uint32_t i = 0; i < celFiles.size(); i++) {
            std::string paramName = prefix + opt + "cel-" + ToStr(i+1);
            info.addParam(paramName, Fs::basename(celFiles[i]));
        
            affymetrix_fusion_io::FusionCELData cel;
            try {
                cel.SetFileName(celFiles[i].c_str());
                if(!cel.ReadHeader()) {
                    Err::errAbort("Unable to read CEL file " + celFiles[i]);
                }
                affymetrix_calvin_io::GenericData *gdata = cel.GetGenericData();
                if (gdata != NULL)
                {
                    std::string paramName = prefix + opt + "cel-guid-" + ToStr(i+1);
                    info.addParam(paramName, gdata->Header().GetGenericDataHdr()->GetFileId());
                }
                cel.Close();
            }
            catch (...)
            {
                Err::errAbort("Unable to read CEL file " + celFiles[i]);
            }
        }
    }
    
    // Engine State
    string option = "state-";
    std::vector<std::string> optionNames;
    getOptionNames(optionNames);
    for(int i=0; i<optionNames.size(); i++) {
        string name = optionNames[i];
        if((name != "cels") || getOptBool("batch-info")){
            info.addParam(prefix + option + name, makeVal(name));
        }
    }

    // Sanity check
    Err::check(info.m_ParamValues.size() == info.m_ParamNames.size(),
             "AnalysisInfo - Names and values out of sync.");
}
/**
 * Compute genotypes
 */
void DmetEngine::computeGenotypes() {
    ProbesetGenotypeEngine engine;

    vector<string> celFiles = getOptVector("cels");
    engine.setOpt("cels",celFiles);
    vector<string> chipTypes = getOptVector("chip-type");
    if(chipTypes.size() > 0)
        engine.setOpt("chip-type",chipTypes);
    engine.setOpt("cdf-file",getOpt("cdf-file"));
    engine.setOpt("spf-file",getOpt("spf-file"));
    string analysis;
    if(getOpt("gt-analysis") != "") {
        analysis = getOpt("gt-analysis");
    } else {
        ///@todo If we have a reference, set analysis and qmethod-spec based on 
        ///      what was used to build the reference
        if(getOpt("reference-output") != "") {
            analysis = "quant-norm.sketch=50000,pm-only,brlmm-p-multi.CM=1.bins=100.mix=1.bic=2.lambda=1.0.HARD=3.SB=0.75.KX=0.3.KH=0.3.KXX=0.1.KAH=-0.1.KHB=-0.1.KYAH=-0.05.KYHB=-0.05.KYAB=-0.1.transform=MVA.AAM=2.8.BBM=-2.8.AAV=0.10.BBV=0.10.ABV=0.10.V=1.AAY=10.7.ABY=11.3.BBY=10.7.copyqc=0.00000.wobble=0.05.MS=0.1.copytype=-1.clustertype=2.CSepPen=0.5.ocean=0.0000001";
        } else {
            analysis = "quant-norm.sketch=50000,pm-only,brlmm-p-multi.CM=2.bins=100.mix=1.bic=2.lambda=1.0.HARD=3.SB=0.75.KX=0.3.KH=0.3.KXX=0.1.KAH=-0.1.KHB=-0.1.KYAH=-0.05.KYHB=-0.05.KYAB=-0.1.transform=MVA.AAM=2.8.BBM=-2.8.AAV=0.10.BBV=0.10.ABV=0.10.V=1.AAY=10.7.ABY=11.3.BBY=10.7.copyqc=0.00000.wobble=0.05.MS=0.1.copytype=-1.clustertype=2.CSepPen=0.5.ocean=0.0000001";
        }
        analysis += ".cc-alleles=6";
        analysis += ".cc-type=UCHAR";
        analysis += ".cc-version=1.0";
    }
    if(getOpt("gt-qmethod-spec") != "")
        engine.setOpt("qmethod-spec",getOpt("gt-qmethod-spec"));
    else
        engine.setOpt("qmethod-spec","plier.optmethod=1.FixFeatureEffect=true");

    analysis = setPra(analysis,getOptInt("pra-thresh"));
    engine.setOpt("analysis",analysis);
    engine.setOpt("out-dir",Fs::join(getOpt("out-dir"),"apg"));
    engine.setOpt("verbose",getOpt("verbose"));
    engine.setOpt("force",getOpt("force"));
    engine.setOpt("cc-chp-output","false");
    engine.setOpt("probeset-ids-reported",getOpt("probeset-ids-reported"));
    engine.setOpt("probeset-ids",getOpt("probeset-ids"));
    engine.setOpt("temp-dir", getOpt("temp-dir"));
    engine.setOpt("use-disk", getOpt("use-disk"));
    engine.setOpt("disk-cache", getOpt("disk-cache"));
    engine.setOpt("set-analysis-name", getOpt("set-analysis-name"));
    engine.setOpt("call-coder-max-alleles", getOpt("call-coder-max-alleles"));
    engine.setOpt("call-coder-type", getOpt("call-coder-type"));
    engine.setOpt("call-coder-version", getOpt("call-coder-version"));
    engine.setOpt("command-line", getOpt("command-line"));
    engine.setOpt("exec-guid", getOpt("exec-guid"));
    engine.setOpt("program-name", getOpt("program-name"));
    engine.setOpt("program-company", getOpt("program-company"));
    engine.setOpt("program-version", getOpt("program-version"));
    engine.setOpt("program-cvs-id", getOpt("program-cvs-id"));
    engine.setOpt("version-to-report", getOpt("version-to-report"));
    engine.setOpt("prior-size", "1");
    engine.setOpt("special-snps", getOpt("special-snps"));
    engine.setOpt("em-gender", "false");
    if(getOpt("chrX-probes") != "") {
        engine.setOpt("chrX-probes", getOpt("chrX-probes"));
        engine.setOpt("chrY-probes", getOpt("chrY-probes"));
        engine.setOpt("set-gender-method", "cn-probe-chrXY-ratio");
        engine.setOpt("no-gender-force", "false");
    } else {
        engine.setOpt("set-gender-method", "none");
        engine.setOpt("no-gender-force", "true");
    }
    engine.setOpt("female-thresh", getOpt("female-thresh"));
    engine.setOpt("male-thresh", getOpt("male-thresh"));
    engine.setOpt("table-output", "false");
    engine.setOpt("output-context", "true");
    engine.setOpt("output-forced-calls", "true");

    if (getOptBool("run-cn-engine") && getOpt("cn-region-gt-probeset-file") != "") {
      /// @todo perhaps we should use state to track the file name
      ///       as it is we need to keep this in sync with the cn engine call
      engine.setOpt("genotype-markers-cn-file", 
                    Fs::join(getOpt("out-dir"),
                                 "adc",
                                 getOpt("set-analysis-name") + ".gt.markers.cn.call.a5"));
    }

    engine.setOpt("a5-global-file",getOpt("reference-output"));
    engine.setOpt("a5-global-file-no-replace","true");
    engine.setOpt("a5-group","ProbesetGenotypeEngine");
    engine.setOpt("a5-calls","true");
    engine.setOpt("a5-summaries","true");
    engine.setOpt("a5-feature-effects","true");
    engine.setOpt("a5-sketch","true");
    engine.setOpt("a5-write-models","true");
    if(getOpt("reference-output") != "") {
        engine.setOpt("a5-sketch-use-global","true");
        engine.setOpt("a5-feature-effects-use-global","true");
        engine.setOpt("a5-write-models-use-global","true");
    }
  
    engine.setOpt("a5-global-input-file",getOpt("reference-input"));
    engine.setOpt("a5-input-group","ProbesetGenotypeEngine");
    if(getOpt("reference-input") != "") {
        engine.setOpt("a5-sketch-input-global","true");
        engine.setOpt("a5-feature-effects-input-global","true");
        engine.setOpt("a5-models-input-global","true");
    }

    if(m_ArgvPosAPG > 0) 
        engine.parseArgv(m_argv, m_ArgvPosAPG);
    engine.run();
}
/**
 * Make sure that our options are sane. Call Err::errAbort if not.
 */
void DmetEngine::checkOptionsImp() {

    defineStates();

    setLibFileOpt("cdf-file");
    setLibFileOpt("spf-file");
    setLibFileOpt("special-snps");
    setLibFileOpt("chrX-probes");
    setLibFileOpt("chrY-probes");
    setLibFileOpt("reference-input");
    setLibFileOpt("probeset-ids");
    setLibFileOpt("probeset-ids-reported");
    setLibFileOpt("region-model");
    setLibFileOpt("probeset-model");
	setLibFileOpt("cn-region-gt-probeset-file");

    if (getOpt("out-dir") == "") {Err::errAbort("Must specify an output directory.");}
    if (getOpt("temp-dir") == "") { 
      setOpt("temp-dir", Fs::join(getOpt("out-dir"),"temp"));
    }

    string cdfFile = getOpt("cdf-file");
    string spfFile = getOpt("spf-file");
    string specialSnps = getOpt("special-snps");
    string chrXProbes = getOpt("chrX-probes");
    string chrYProbes = getOpt("chrY-probes");

	if(getOpt("sample-type") == "plasmid") { 
		setOpt("run-cn-engine","false"); 
	} else { 
		setOpt("run-cn-engine",getOpt("run-cn-engine"));
	}

    string refOut = getOpt("reference-output");
	string batchName = getOpt("batch-name");
    if(refOut == "") {
		if (batchName.empty() != true)
			Err::errAbort("You cannot provide a batch-name when running in single sample mode. batch-name is only valid when output-reference is specified.");
    } else {
		if (batchName.empty() == true)
			Err::errAbort("You must define the batch-name parameter");
        if(Fs::isReadable(refOut))
            if(Fs::rm(refOut, false) != APT_OK)
                Err::errAbort("Unable to remove existing reference-output file '" + refOut + "'");
	}

    ///@todo check chip type in reference file
    ///@todo check reference file version

    /* Read in cel file list from other file if specified. */
    vector<string> celFiles;
    EngineUtil::getCelFiles(celFiles, this);
    if(celFiles.size() == 0)
        Err::errAbort("No cel files specified.");
    setOpt("cels",celFiles);

    // Build a consent file if vector of markers was passed in
    vector<string> consented = getOptVector("report");
    if(consented.size() > 0) {
      FsPath probeset_path;
      probeset_path.setPath(getOpt("out-dir"),"probesets-reported","txt");
      //probeset_path.ensureWriteableDirPath();
	  Fs::ensureWriteableDirPath(getOpt("out-dir", false));
      writeProbesetList(probeset_path.asUnixPath(), consented);
      setOpt("probeset-ids-reported", probeset_path.asUnixPath());
    } else {
      setOpt("probeset-ids-reported", getOpt("probeset-ids-reported"));
    }

    if(cdfFile == "" && spfFile == "")
        Err::errAbort("Must specify either a cdf file or spf (simple probe format) file.");
    if (chrXProbes != "" && chrYProbes == "")
        Err::errAbort("Must provide a chrY Probe File when providing a chrX Probe File.");
    if (chrXProbes == "" && chrYProbes != "")
        Err::errAbort("Must provide a chrX Probe File when providing a chrY Probe File.");

    // Check chip types
    vector<string> chipTypesInLayout;

    /* Get the intial info about the chip and check cel files to make sure
       they match. */
    colrow_t numRows = 0, numCols = 0;
    int probeCount=0, probeSetCount=0;

    if(cdfFile != "")
        EngineUtil::getCdfChipType(chipTypesInLayout, numRows, numCols, probeCount, probeSetCount, cdfFile);
    else if(spfFile != "")
        EngineUtil::getSpfChipType(chipTypesInLayout, numRows, numCols, probeCount, probeSetCount, spfFile);
    else
        Err::errAbort("Must specify either a cdf file or spf (simple probe format) file.");

    setOpt("num-rows", ToStr(numRows));
    setOpt("num-cols", ToStr(numCols));
    setOpt("probe-count", ToStr(probeCount));

    if(chipTypesInLayout.empty() || chipTypesInLayout[0] == "" || probeCount == 0) 
        Err::errAbort("Problem determining ChipType in file: " + 
              ( cdfFile != "" ? cdfFile : spfFile));

    /* Did the user "force" a set of chip types via options? */
    vector<string> chipTypesSupplied = getOptVector("chip-type");

    /* Figure out what chip type to report */
    if(chipTypesSupplied.size() > 0) {
        setOpt("chip-type", chipTypesSupplied);
    } else if(chipTypesInLayout.size() > 0) {
        setOpt("chip-type", chipTypesInLayout);
    } else {
        Err::errAbort("Unable to figure out a chip type.");
    }

    /* Do Chip Type Check */
    if(!getOptBool("force")) {
        vector<string> chipTypeJustPrimary;
        vector<string> chipTypesToCheck;

        if(chipTypesSupplied.size() > 0) {
            chipTypesToCheck = chipTypesSupplied;
            EngineUtil::checkChipTypeVectors(chipTypesSupplied, chipTypesInLayout);
        } else {
            chipTypesToCheck = chipTypesInLayout;
        }

        chipTypeJustPrimary.push_back(chipTypesToCheck[0]);
        EngineUtil::checkCelChipTypes(chipTypesToCheck, probeCount, celFiles, numRows, numCols);

        // Check special SNPs files
        if (specialSnps != "") {
            EngineUtil::checkTsvFileChipType(specialSnps, chipTypeJustPrimary);
        }
        
        // And other files
        if (chrXProbes != "") {
            EngineUtil::checkTsvFileChipType(chrXProbes, chipTypesToCheck);
        }
        if (chrYProbes != "") {
            EngineUtil::checkTsvFileChipType(chrYProbes, chipTypesToCheck);
        }
    } // end if(!force)
}
示例#9
0
void CDiskWriteSlaveActivityBase::open()
{
    start();
    if (dlfn.isExternal() && !firstNode())
    {
        if (!rfsQueryParallel)
        {
            ActPrintLog("Blocked, waiting for previous part to complete write");
            CMessageBuffer msg;
            if (!receiveMsg(msg, queryJobChannel().queryMyRank()-1, mpTag))
                return;
            rowcount_t prevRows;
            msg.read(prevRows);
            msg.read(tempExternalName); // reuse temp filename, last node will rename
            ActPrintLog("Previous write row count = %" RCPF "d", prevRows);
        }
    }
    processed = THORDATALINK_STARTED;

    bool extend = 0 != (diskHelperBase->getFlags() & TDWextend);
    if (extend)
        ActPrintLog("Extending file %s", fName.get());

    /* Fixed length record size is used when outputting compressed stream to determine run-length compression vs default LZW compression.
     * NB: only for FLAT files, not CSV or XML
     */
    size32_t diskRowMinSz = 0;
    IOutputMetaData *diskRowMeta = diskHelperBase->queryDiskRecordSize()->querySerializedDiskMeta();
    if (diskRowMeta->isFixedSize() && (TAKdiskwrite == container.getKind()))
    {
        diskRowMinSz = diskRowMeta->getMinRecordSize();
        if (grouped)
            diskRowMinSz += 1;
    }

    if (compress)
        calcFileCrc = getOptBool(THOROPT_WRITECOMPRESSED_CRC, false);
    else
        calcFileCrc = getOptBool(THOROPT_WRITE_CRC, true);

    bool external = dlfn.isExternal();
    bool query = dlfn.isQuery();
    if (query && compress)
        UNIMPLEMENTED;

    unsigned twFlags = external ? TW_External : 0;
    if (query || (external && !firstNode()))
        twFlags |= TW_Direct;
    if (!external || (!query && lastNode()))
        twFlags |= TW_RenameToPrimary;
    if (extend||(external&&!query))
        twFlags |= TW_Extend;
    if (diskHelperBase->getFlags() & TDXtemporary)
        twFlags |= TW_Temporary;

    {
        CriticalBlock block(statsCs);
        outputIO.setown(createMultipleWrite(this, *partDesc, diskRowMinSz, twFlags, compress, ecomp, this, &abortSoon, (external&&!query) ? &tempExternalName : NULL));
    }

    if (compress)
    {
        ActPrintLog("Performing compression on output file: %s", fName.get());
        // NB: block compressed output has implicit crc of 0, no need to calculate in row  writer.
        calcFileCrc = false;
    }
    Owned<IFileIOStream> stream;
    if (wantRaw())
    {
        outraw.setown(createBufferedIOStream(outputIO));
        stream.set(outraw);
    }
    else
    {
        stream.setown(createIOStream(outputIO));
        unsigned rwFlags = 0;
        if (grouped)
            rwFlags |= rw_grouped;
        if (calcFileCrc)
            rwFlags |= rw_crc;
        out.setown(createRowWriter(stream, ::queryRowInterfaces(input), rwFlags));
    }
    if (extend || (external && !query))
        stream->seek(0,IFSend);
    ActPrintLog("Created output stream for %s, calcFileCrc=%s", fName.get(), calcFileCrc?"true":"false");
}