ContextBase::ContextBase() : _program(UNSPECIFIED_PROGRAM), _allFilesOpened(false), _genomeFile(NULL), _outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE), _outputTypeDetermined(false), _skipFirstArgs(0), _showHelp(false), _obeySplits(false), _uncompressedBam(false), _useBufferedOutput(true), _ioBufSize(0), _anyHit(false), _noHit(false), _writeA(false), _writeB(false), _leftJoin(false), _writeCount(false), _writeOverlap(false), _writeAllOverlap(false), _haveFraction(false), _overlapFraction(1E-9), _reciprocal(false), _sameStrand(false), _diffStrand(false), _sortedInput(false), _printHeader(false), _printable(true), _explicitBedOutput(false), _queryFileIdx(-1), _databaseFileIdx(-1), _bamHeaderAndRefIdx(-1), _maxNumDatabaseFields(0), _useFullBamTags(false), _reportCount(false), _reportNames(false), _reportScores(false), _numOutputRecords(0), _hasConstantSeed(false), _seed(0), _forwardOnly(false), _reverseOnly(false), _hasColumnOpsMethods(false), _keyListOps(NULL), _desiredStrand(FileRecordMergeMgr::ANY_STRAND), _maxDistance(0), _useMergedIntervals(false) { _programNames["intersect"] = INTERSECT; _programNames["sample"] = SAMPLE; _programNames["map"] = MAP; _programNames["merge"] = MERGE; if (hasColumnOpsMethods()) { _keyListOps = new KeyListOps(); } }
//for col ops, delimStr will appear between each item in //a collapsed but delimited list. bool ContextBase::handle_delim() { if (!hasColumnOpsMethods()) { return false; } if ((_i+1) < _argc) { _keyListOps->setDelimStr(_argv[_i + 1]); markUsed(_i - _skipFirstArgs); _i++; markUsed(_i - _skipFirstArgs); } return true; }
//for col ops, delimStr will appear between each item in //a collapsed but delimited list. bool ContextBase::handle_delim() { if (!hasColumnOpsMethods()) { _errorMsg = "\n***** ERROR: Can't set delimiter for tools without column operations. Exiting. *****"; return false; } if ((_i+1) < _argc) { _keyListOps->setDelimStr(_argv[_i + 1]); markUsed(_i - _skipFirstArgs); _i++; markUsed(_i - _skipFirstArgs); } return true; }
bool ContextBase::isValidState() { if (!openFiles()) { return false; } if (!cmdArgsValid()) { return false; } if (!determineOutputType()) { return false; } if (_program != GROUP_BY && _files[0]->getRecordType() == FileRecordTypeChecker::NO_POS_PLUS_RECORD_TYPE) { _errorMsg = "ERROR: file "; _errorMsg.append(_files[0]->getFileName()); _errorMsg.append(" has non positional records, which are only valid for \n"); _errorMsg.append(" the groupBy tool. Perhaps you are using a header"); _errorMsg.append(" line(s) that starts with \n"); _errorMsg.append(" something other than \"#\", \"chrom\", or \"chr\" (any case)?"); return false; } if (getObeySplits()) { _splitBlockInfo = new BlockMgr(_overlapFractionA, _reciprocalFraction); } if (hasColumnOpsMethods()) { if (hasIntersectMethods()) { for (int i=0; i < (int)_dbFileIdxs.size(); i++) { FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } } else { FileRecordMgr *dbFile = getFile(0); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } //if user specified a precision, pass it to //keyList ops if (_reportPrecision != -1) { _keyListOps->setPrecision(_reportPrecision); } } return true; }
ContextBase::~ContextBase() { delete _genomeFile; _genomeFile = NULL; //close all files and delete FRM objects. for (int i=0; i < (int)_files.size(); i++) { _files[i]->close(); delete _files[i]; _files[i] = NULL; } if (hasColumnOpsMethods()) { delete _keyListOps; _keyListOps = NULL; } }
bool ContextBase::handle_prec() { if (!hasColumnOpsMethods()) { return false; } if ((_i+1) < _argc) { int prec = atoi(_argv[_i + 1]); if (prec < 1) { _errorMsg += "\n***** ERROR: -prec must be followed by a positive integer. Exiting. *****"; return false; } _reportPrecision = prec; markUsed(_i - _skipFirstArgs); _i++; markUsed(_i - _skipFirstArgs); return true; } _errorMsg += "\n***** ERROR: -prec must be followed by a positive integer. Exiting. *****"; return false; }
bool ContextBase::isValidState() { if (!openFiles()) { return false; } if (!cmdArgsValid()) { return false; } if (!determineOutputType()) { return false; } if (hasColumnOpsMethods()) { FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } return true; }
bool ContextBase::openFiles() { //Make a vector of FileRecordMgr objects by going through the vector //of filenames and opening each one. if (_allFilesOpened) { return true; } if (_fileNames.size() == 0) { //No input was specified. Error and exit. _errorMsg += "\n***** ERROR: No input file given. Exiting. *****"; return false; } _files.resize(_fileNames.size()); for (int i = 0; i < (int)_fileNames.size(); i++) { FileRecordMgr *frm = getNewFRM(_fileNames[i], i); if (hasGenomeFile()) { frm->setGenomeFile(_genomeFile); } //If we're going to do column operations, and an input file // is BAM, we'll need the full flags. if (hasColumnOpsMethods()) { setUseFullBamTags(true); } frm->setFullBamFlags(_useFullBamTags); frm->setIsSorted(_sortedInput); frm->setIoBufSize(_ioBufSize); frm->setIsGroupBy(_program == GROUP_BY); if (!frm->open(_inheader)) { return false; } if (_noEnforceCoordSort) { frm->setNoEnforceCoodSort(true); } _files[i] = frm; } _allFilesOpened = true; return true; }
ContextBase::~ContextBase() { delete _genomeFile; _genomeFile = NULL; delete _splitBlockInfo; _splitBlockInfo = NULL; if (_nameConventionWarningTripped) { cerr << _nameConventionWarningMsg << endl; } //close all files and delete FRM objects. for (int i=0; i < (int)_files.size(); i++) { _files[i]->close(); delete _files[i]; _files[i] = NULL; } if (hasColumnOpsMethods()) { delete _keyListOps; _keyListOps = NULL; } }
bool ContextBase::isValidState() { if (!openFiles()) { return false; } if (!cmdArgsValid()) { return false; } if (!determineOutputType()) { return false; } if (getObeySplits()) { _splitBlockInfo = new BlockMgr(_overlapFraction, _reciprocal); } if (hasColumnOpsMethods()) { if (hasIntersectMethods()) { for (int i=0; i < (int)_dbFileIdxs.size(); i++) { FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } } else { FileRecordMgr *dbFile = getFile(0); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } //if user specified a precision, pass it to //keyList ops if (_reportPrecision != -1) { _keyListOps->setPrecision(_reportPrecision); } } return true; }
bool ContextFisher::isValidState() { if (!ContextIntersect::isValidState()) { return false; } // Tests for stranded merge // if (_desiredStrand != FileRecordMergeMgr::ANY_STRAND) { // requested stranded merge for (int i=0; i < getNumInputFiles(); i++) { // make sure file has strand. if (!getFile(i)->recordsHaveStrand()) { _errorMsg = "\n***** ERROR: stranded merge requested, but input file "; _errorMsg += getInputFileName(i); _errorMsg += " does not have strands. *****"; return false; } //make sure file is not VCF. if (getFile(1)->getFileType() == FileRecordTypeChecker::VCF_FILE_TYPE) { _errorMsg = "\n***** ERROR: stranded merge not supported for VCF file "; _errorMsg += getInputFileName(i); _errorMsg += ". *****"; return false; } } } if (_genomeFile == NULL){ _errorMsg = "\nERROR*****: specify -g genome file*****\n"; return false; } //column operations not allowed with BAM input if (hasColumnOpsMethods() && getFile(0)->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) { _errorMsg = "\n***** ERROR: stranded merge not supported for VCF files. *****"; return false; } return true; }
const string &ContextBase::getColumnOpsVal(RecordKeyVector &keyList) const { if (!hasColumnOpsMethods()) { return _nullStr; } return _keyListOps->getOpVals(keyList); }
ContextBase::ContextBase() : _program(UNSPECIFIED_PROGRAM), _files(NULL), _allFilesOpened(false), _genomeFile(NULL), _outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE), _outputTypeDetermined(false), _skipFirstArgs(0), _showHelp(false), _obeySplits(false), _uncompressedBam(false), _useBufferedOutput(true), _ioBufSize(0), _anyHit(false), _noHit(false), _writeA(false), _writeB(false), _leftJoin(false), _writeCount(false), _writeOverlap(false), _writeAllOverlap(false), _haveFractionA(false), _haveFractionB(false), _overlapFractionA(0.0), _overlapFractionB(0.0), _reciprocalFraction(false), _eitherFraction(false), _sameStrand(false), _diffStrand(false), _sortedInput(false), _sortOutput(false), _reportDBnameTags(false), _reportDBfileNames(false), _printHeader(false), _printable(true), _explicitBedOutput(false), _queryFileIdx(-1), _bamHeaderAndRefIdx(-1), _maxNumDatabaseFields(0), _useFullBamTags(false), _numOutputRecords(0), _hasConstantSeed(false), _seed(0), _forwardOnly(false), _reverseOnly(false), _nameCheckDisabled(false), _hasColumnOpsMethods(false), _keyListOps(NULL), _desiredStrand(FileRecordMergeMgr::ANY_STRAND), _maxDistance(0), _useMergedIntervals(false), _reportPrecision(-1), _splitBlockInfo(NULL), _allFilesHaveChrInChromNames(UNTESTED), _allFileHaveLeadingZeroInChromNames(UNTESTED), _noEnforceCoordSort(false), _inheader(false), _nameConventionWarningTripped(false) { _programNames["intersect"] = INTERSECT; _programNames["sample"] = SAMPLE; _programNames["map"] = MAP; _programNames["merge"] = MERGE; _programNames["closest"] = CLOSEST; _programNames["subtract"] = SUBTRACT; _programNames["jaccard"] = JACCARD; _programNames["spacing"] = SPACING; _programNames["fisher"] = FISHER; _programNames["sample"] = SAMPLE; _programNames["coverage"] = COVERAGE; _programNames["complement"] = COMPLEMENT; _programNames["groupby"] = GROUP_BY; if (hasColumnOpsMethods()) { _keyListOps = new KeyListOps(); } }