Beispiel #1
0
ContextBase::ContextBase()
:
  _program(UNSPECIFIED_PROGRAM),
  _allFilesOpened(false),
  _genomeFile(NULL),
  _outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE),
  _outputTypeDetermined(false),
  _skipFirstArgs(0),
  _showHelp(false),
  _obeySplits(false),
  _uncompressedBam(false),
  _useBufferedOutput(true),
  _ioBufSize(0),
  _anyHit(false),
  _noHit(false),
  _writeA(false),
  _writeB(false),
  _leftJoin(false),
  _writeCount(false),
  _writeOverlap(false),
  _writeAllOverlap(false),
  _haveFraction(false),
  _overlapFraction(1E-9),
  _reciprocal(false),
  _sameStrand(false),
  _diffStrand(false),
   _sortedInput(false),
  _printHeader(false),
  _printable(true),
   _explicitBedOutput(false),
  _queryFileIdx(-1),
  _databaseFileIdx(-1),
  _bamHeaderAndRefIdx(-1),
  _maxNumDatabaseFields(0),
  _useFullBamTags(false),
  _reportCount(false),
  _reportNames(false),
  _reportScores(false),
  _numOutputRecords(0),
  _hasConstantSeed(false),
  _seed(0),
  _forwardOnly(false),
  _reverseOnly(false),
  _hasColumnOpsMethods(false),
  _keyListOps(NULL),
  _desiredStrand(FileRecordMergeMgr::ANY_STRAND),
  _maxDistance(0),
  _useMergedIntervals(false)

{
	_programNames["intersect"] = INTERSECT;
	_programNames["sample"] = SAMPLE;
	_programNames["map"] = MAP;
	_programNames["merge"] = MERGE;

	if (hasColumnOpsMethods()) {
		_keyListOps = new KeyListOps();
	}
}
Beispiel #2
0
//for col ops, delimStr will appear between each item in
//a collapsed but delimited list.
bool ContextBase::handle_delim()
{
	if (!hasColumnOpsMethods()) {
		return false;
	}
    if ((_i+1) < _argc) {
    	 _keyListOps->setDelimStr(_argv[_i + 1]);
        markUsed(_i - _skipFirstArgs);
        _i++;
        markUsed(_i - _skipFirstArgs);
    }
    return true;
}
Beispiel #3
0
//for col ops, delimStr will appear between each item in
//a collapsed but delimited list.
bool ContextBase::handle_delim()
{
	if (!hasColumnOpsMethods()) {
		_errorMsg = "\n***** ERROR: Can't set delimiter for tools without column operations. Exiting. *****";
		return false;
	}
    if ((_i+1) < _argc) {
    	 _keyListOps->setDelimStr(_argv[_i + 1]);
        markUsed(_i - _skipFirstArgs);
        _i++;
        markUsed(_i - _skipFirstArgs);
    }
    return true;
}
Beispiel #4
0
bool ContextBase::isValidState()
{
	if (!openFiles()) {
		return false;
	}
	if (!cmdArgsValid()) {
		return false;
	}
	if (!determineOutputType()) {
		return false;
	}
	if (_program != GROUP_BY && 
		_files[0]->getRecordType() == FileRecordTypeChecker::NO_POS_PLUS_RECORD_TYPE) 
	{
		_errorMsg = "ERROR: file ";
		_errorMsg.append(_files[0]->getFileName());
		_errorMsg.append(" has non positional records, which are only valid for \n");
		_errorMsg.append(" the groupBy tool. Perhaps you are using a header");
		_errorMsg.append(" line(s) that starts with \n");
		_errorMsg.append(" something other than \"#\", \"chrom\", or \"chr\" (any case)?");
		return false;
	}
	if (getObeySplits()) {
		_splitBlockInfo = new BlockMgr(_overlapFractionA, _reciprocalFraction);
	}
	if (hasColumnOpsMethods()) {

		if (hasIntersectMethods()) {
			for (int i=0; i < (int)_dbFileIdxs.size(); i++) {
				FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]);
				_keyListOps->setDBfileType(dbFile->getFileType());
				if (!_keyListOps->isValidColumnOps(dbFile)) {
					return false;
				}
			}
		} else {
			FileRecordMgr *dbFile = getFile(0);
			_keyListOps->setDBfileType(dbFile->getFileType());
			if (!_keyListOps->isValidColumnOps(dbFile)) {
				return false;
			}
		}
		//if user specified a precision, pass it to
		//keyList ops
		if (_reportPrecision != -1) {
			_keyListOps->setPrecision(_reportPrecision);
		}
	}
	return true;
}
Beispiel #5
0
ContextBase::~ContextBase()
{
	delete _genomeFile;
	_genomeFile = NULL;

	//close all files and delete FRM objects.
	for (int i=0; i < (int)_files.size(); i++) {
		_files[i]->close();
		delete _files[i];
		_files[i] = NULL;
	}
	if (hasColumnOpsMethods()) {
		delete _keyListOps;
		_keyListOps = NULL;
	}

}
Beispiel #6
0
bool ContextBase::handle_prec()
{
	if (!hasColumnOpsMethods()) {
		return false;
	}
    if ((_i+1) < _argc) {
    	int prec = atoi(_argv[_i + 1]);
    	if (prec < 1) {
    		_errorMsg += "\n***** ERROR: -prec must be followed by a positive integer. Exiting. *****";
    		return false;
    	}
    	 _reportPrecision = prec;
        markUsed(_i - _skipFirstArgs);
        _i++;
        markUsed(_i - _skipFirstArgs);
        return true;
    }
	_errorMsg += "\n***** ERROR: -prec must be followed by a positive integer. Exiting. *****";
    return false;
}
Beispiel #7
0
bool ContextBase::isValidState()
{
	if (!openFiles()) {
		return false;
	}
	if (!cmdArgsValid()) {
		return false;
	}
	if (!determineOutputType()) {
		return false;
	}
	if (hasColumnOpsMethods()) {
		FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0);
		_keyListOps->setDBfileType(dbFile->getFileType());
		if (!_keyListOps->isValidColumnOps(dbFile)) {
			return false;
		}
	}
	return true;
}
Beispiel #8
0
bool ContextBase::openFiles() {

	//Make a vector of FileRecordMgr objects by going through the vector
	//of filenames and opening each one.
	if (_allFilesOpened) {
		return true;
	}

	if (_fileNames.size() == 0) {
		//No input was specified. Error and exit.
		_errorMsg += "\n***** ERROR: No input file given. Exiting. *****";
		return false;
	}

	_files.resize(_fileNames.size());

	for (int i = 0; i < (int)_fileNames.size(); i++) {
		FileRecordMgr *frm = getNewFRM(_fileNames[i], i);
		if (hasGenomeFile()) {
			frm->setGenomeFile(_genomeFile);
		}
		//If we're going to do column operations, and an input file
		// is BAM, we'll need the full flags.
		if (hasColumnOpsMethods()) {
			setUseFullBamTags(true);
		}
		frm->setFullBamFlags(_useFullBamTags);
		frm->setIsSorted(_sortedInput);
		frm->setIoBufSize(_ioBufSize);
		frm->setIsGroupBy(_program == GROUP_BY);
		if (!frm->open(_inheader)) {
			return false;
		}
		if (_noEnforceCoordSort) {
			frm->setNoEnforceCoodSort(true);
		}
		_files[i] = frm;
	}
	_allFilesOpened = true;
	return true;
}
Beispiel #9
0
ContextBase::~ContextBase()
{
	delete _genomeFile;
	_genomeFile = NULL;
	delete _splitBlockInfo;
	_splitBlockInfo = NULL;

	if (_nameConventionWarningTripped) {
		cerr << _nameConventionWarningMsg << endl;
	}

	//close all files and delete FRM objects.
	for (int i=0; i < (int)_files.size(); i++) {
		_files[i]->close();
		delete _files[i];
		_files[i] = NULL;
	}
	if (hasColumnOpsMethods()) {
		delete _keyListOps;
		_keyListOps = NULL;
	}
}
Beispiel #10
0
bool ContextBase::isValidState()
{
	if (!openFiles()) {
		return false;
	}
	if (!cmdArgsValid()) {
		return false;
	}
	if (!determineOutputType()) {
		return false;
	}
	if (getObeySplits()) {
		_splitBlockInfo = new BlockMgr(_overlapFraction, _reciprocal);
	}
	if (hasColumnOpsMethods()) {

		if (hasIntersectMethods()) {
			for (int i=0; i < (int)_dbFileIdxs.size(); i++) {
				FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]);
				_keyListOps->setDBfileType(dbFile->getFileType());
				if (!_keyListOps->isValidColumnOps(dbFile)) {
					return false;
				}
			}
		} else {
			FileRecordMgr *dbFile = getFile(0);
			_keyListOps->setDBfileType(dbFile->getFileType());
			if (!_keyListOps->isValidColumnOps(dbFile)) {
				return false;
			}
		}
		//if user specified a precision, pass it to
		//keyList ops
		if (_reportPrecision != -1) {
			_keyListOps->setPrecision(_reportPrecision);
		}
	}
	return true;
}
Beispiel #11
0
bool ContextFisher::isValidState()
{
	if (!ContextIntersect::isValidState()) {
		return false;
	}
	// Tests for stranded merge
	//
	if (_desiredStrand != FileRecordMergeMgr::ANY_STRAND) { // requested stranded merge
		for (int i=0; i < getNumInputFiles(); i++) {
			// make sure file has strand.
			if (!getFile(i)->recordsHaveStrand()) {
				_errorMsg = "\n***** ERROR: stranded merge requested, but input file ";
				_errorMsg  += getInputFileName(i);
				_errorMsg  += " does not have strands. *****";
				return false;
			}
			//make sure file is not VCF.
			if (getFile(1)->getFileType() == FileRecordTypeChecker::VCF_FILE_TYPE) {
				_errorMsg = "\n***** ERROR: stranded merge not supported for VCF file ";
				_errorMsg += getInputFileName(i);
				_errorMsg += ". *****";
				return false;
			}
		}
	}
    if (_genomeFile == NULL){
        _errorMsg = "\nERROR*****: specify -g genome file*****\n";
        return false;
    }
	//column operations not allowed with BAM input
	if (hasColumnOpsMethods() &&
			getFile(0)->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
		_errorMsg = "\n***** ERROR: stranded merge not supported for VCF files. *****";
		return false;
	}
	return true;
}
Beispiel #12
0
const string &ContextBase::getColumnOpsVal(RecordKeyVector &keyList) const {
	if (!hasColumnOpsMethods()) {
		return _nullStr;
	}
	return _keyListOps->getOpVals(keyList);
}
Beispiel #13
0
ContextBase::ContextBase()
:
  _program(UNSPECIFIED_PROGRAM),
  _files(NULL),
  _allFilesOpened(false),
  _genomeFile(NULL),
  _outputFileType(FileRecordTypeChecker::UNKNOWN_FILE_TYPE),
  _outputTypeDetermined(false),
  _skipFirstArgs(0),
  _showHelp(false),
  _obeySplits(false),
  _uncompressedBam(false),
  _useBufferedOutput(true),
  _ioBufSize(0),
  _anyHit(false),
  _noHit(false),
  _writeA(false),
  _writeB(false),
  _leftJoin(false),
  _writeCount(false),
  _writeOverlap(false),
  _writeAllOverlap(false),
  _haveFractionA(false),
  _haveFractionB(false),
  _overlapFractionA(0.0),
  _overlapFractionB(0.0),
  _reciprocalFraction(false),
  _eitherFraction(false),
  _sameStrand(false),
  _diffStrand(false),
  _sortedInput(false),
  _sortOutput(false),
  _reportDBnameTags(false),
  _reportDBfileNames(false),
  _printHeader(false),
  _printable(true),
  _explicitBedOutput(false),
  _queryFileIdx(-1),
  _bamHeaderAndRefIdx(-1),
  _maxNumDatabaseFields(0),
  _useFullBamTags(false),
  _numOutputRecords(0),
  _hasConstantSeed(false),
  _seed(0),
  _forwardOnly(false),
  _reverseOnly(false),
  _nameCheckDisabled(false),
  _hasColumnOpsMethods(false),
  _keyListOps(NULL),
  _desiredStrand(FileRecordMergeMgr::ANY_STRAND),
  _maxDistance(0),
  _useMergedIntervals(false),
  _reportPrecision(-1),
  _splitBlockInfo(NULL),
  _allFilesHaveChrInChromNames(UNTESTED),
  _allFileHaveLeadingZeroInChromNames(UNTESTED),
  _noEnforceCoordSort(false),
  _inheader(false),
  _nameConventionWarningTripped(false)

{
	_programNames["intersect"] = INTERSECT;
	_programNames["sample"] = SAMPLE;
	_programNames["map"] = MAP;
	_programNames["merge"] = MERGE;
	_programNames["closest"] = CLOSEST;
	_programNames["subtract"] = SUBTRACT;
	_programNames["jaccard"] = JACCARD;
	_programNames["spacing"] = SPACING;
	_programNames["fisher"] = FISHER;
	_programNames["sample"] = SAMPLE;
	_programNames["coverage"] = COVERAGE;
	_programNames["complement"] = COMPLEMENT;
	_programNames["groupby"] = GROUP_BY;



	if (hasColumnOpsMethods()) {
		_keyListOps = new KeyListOps();
	}
}