Esempio n. 1
0
bool MergeFile::merge()
{
    RecordKeyVector hitSet;
    FileRecordMgr *frm = _context->getFile(0);
    while (!frm->eof()) {
    	Record *key = frm->getNextRecord(&hitSet);
    	if (key == NULL) continue;
		_recordOutputMgr->printRecord(hitSet.getKey(), _context->getColumnOpsVal(hitSet));
    }
    return true;
}
Esempio n. 2
0
FileRecordMgr *ContextBase::getNewFRM(const string &filename, int fileIdx) {

	if (_useMergedIntervals) {
		FileRecordMergeMgr *frm = new FileRecordMergeMgr(filename);
		frm->setStrandType(_desiredStrand);
		frm->setMaxDistance(_maxDistance);
		frm->setFileIdx(fileIdx);
		return frm;
	} else {
		FileRecordMgr *frm = new FileRecordMgr(filename);
		frm->setFileIdx(fileIdx);
		return frm;
	}
}
Esempio n. 3
0
bool ContextBase::isValidState()
{
	if (!openFiles()) {
		return false;
	}
	if (!cmdArgsValid()) {
		return false;
	}
	if (!determineOutputType()) {
		return false;
	}
	if (_program != GROUP_BY && 
		_files[0]->getRecordType() == FileRecordTypeChecker::NO_POS_PLUS_RECORD_TYPE) 
	{
		_errorMsg = "ERROR: file ";
		_errorMsg.append(_files[0]->getFileName());
		_errorMsg.append(" has non positional records, which are only valid for \n");
		_errorMsg.append(" the groupBy tool. Perhaps you are using a header");
		_errorMsg.append(" line(s) that starts with \n");
		_errorMsg.append(" something other than \"#\", \"chrom\", or \"chr\" (any case)?");
		return false;
	}
	if (getObeySplits()) {
		_splitBlockInfo = new BlockMgr(_overlapFractionA, _reciprocalFraction);
	}
	if (hasColumnOpsMethods()) {

		if (hasIntersectMethods()) {
			for (int i=0; i < (int)_dbFileIdxs.size(); i++) {
				FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]);
				_keyListOps->setDBfileType(dbFile->getFileType());
				if (!_keyListOps->isValidColumnOps(dbFile)) {
					return false;
				}
			}
		} else {
			FileRecordMgr *dbFile = getFile(0);
			_keyListOps->setDBfileType(dbFile->getFileType());
			if (!_keyListOps->isValidColumnOps(dbFile)) {
				return false;
			}
		}
		//if user specified a precision, pass it to
		//keyList ops
		if (_reportPrecision != -1) {
			_keyListOps->setPrecision(_reportPrecision);
		}
	}
	return true;
}
Esempio n. 4
0
const string &ContextGroupBy::getDefaultHeader() {
	//groupBy does not support multiple databases.
	FileRecordMgr *frm = _files[0];
	int numFields = frm->getNumFields();
	_defaultHeader.clear();
	ostringstream s;
	for (int i=1; i <= numFields; i++) {
		s << "col_";
		s << i;
		s << "\t";
	}
	_defaultHeader.append(s.str());
	//change last tab into newline
	_defaultHeader[_defaultHeader.size()-1] = '\n';
	return _defaultHeader;
}
Esempio n. 5
0
bool ContextBase::isValidState()
{
	if (!openFiles()) {
		return false;
	}
	if (!cmdArgsValid()) {
		return false;
	}
	if (!determineOutputType()) {
		return false;
	}
	if (hasColumnOpsMethods()) {
		FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0);
		_keyListOps->setDBfileType(dbFile->getFileType());
		if (!_keyListOps->isValidColumnOps(dbFile)) {
			return false;
		}
	}
	return true;
}
Esempio n. 6
0
bool ContextBase::isValidState()
{
	if (!openFiles()) {
		return false;
	}
	if (!cmdArgsValid()) {
		return false;
	}
	if (!determineOutputType()) {
		return false;
	}
	if (getObeySplits()) {
		_splitBlockInfo = new BlockMgr(_overlapFraction, _reciprocal);
	}
	if (hasColumnOpsMethods()) {

		if (hasIntersectMethods()) {
			for (int i=0; i < (int)_dbFileIdxs.size(); i++) {
				FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]);
				_keyListOps->setDBfileType(dbFile->getFileType());
				if (!_keyListOps->isValidColumnOps(dbFile)) {
					return false;
				}
			}
		} else {
			FileRecordMgr *dbFile = getFile(0);
			_keyListOps->setDBfileType(dbFile->getFileType());
			if (!_keyListOps->isValidColumnOps(dbFile)) {
				return false;
			}
		}
		//if user specified a precision, pass it to
		//keyList ops
		if (_reportPrecision != -1) {
			_keyListOps->setPrecision(_reportPrecision);
		}
	}
	return true;
}
Esempio n. 7
0
bool ContextBase::openFiles() {

	//Make a vector of FileRecordMgr objects by going through the vector
	//of filenames and opening each one.
	if (_allFilesOpened) {
		return true;
	}
	_files.resize(_fileNames.size());

	for (int i = 0; i < (int)_fileNames.size(); i++) {
		FileRecordMgr *frm = new FileRecordMgr(_fileNames[i], _sortedInput);
		if (hasGenomeFile()) {
			frm->setGenomeFile(_genomeFile);
		}
		frm->setFullBamFlags(_useFullBamTags);
		if (!frm->open()) {
			return false;
		}
		_files[i] = frm;
	}
	_allFilesOpened = true;
	return true;
}
Esempio n. 8
0
bool ContextBase::openFiles() {

	//Make a vector of FileRecordMgr objects by going through the vector
	//of filenames and opening each one.
	if (_allFilesOpened) {
		return true;
	}

	if (_fileNames.size() == 0) {
		//No input was specified. Error and exit.
		_errorMsg += "\n***** ERROR: No input file given. Exiting. *****";
		return false;
	}

	_files.resize(_fileNames.size());

	for (int i = 0; i < (int)_fileNames.size(); i++) {
		FileRecordMgr *frm = getNewFRM(_fileNames[i], i);
		if (hasGenomeFile()) {
			frm->setGenomeFile(_genomeFile);
		}
		//If we're going to do column operations, and an input file
		// is BAM, we'll need the full flags.
		if (hasColumnOpsMethods()) {
			setUseFullBamTags(true);
		}
		frm->setFullBamFlags(_useFullBamTags);
		frm->setIsSorted(_sortedInput);
		frm->setIoBufSize(_ioBufSize);
		frm->setIsGroupBy(_program == GROUP_BY);
		if (!frm->open(_inheader)) {
			return false;
		}
		if (_noEnforceCoordSort) {
			frm->setNoEnforceCoodSort(true);
		}
		_files[i] = frm;
	}
	_allFilesOpened = true;
	return true;
}
Esempio n. 9
0
bool FileIntersect::processUnsortedFiles()
{
	const QuickString &databaseFilename = _context->getDatabaseFileName();
	BinTree *binTree = new BinTree(_context->getDatabaseFileIdx(), _context);

	FileRecordMgr *queryFRM = new FileRecordMgr(_context->getQueryFileIdx(), _context);
	if (!queryFRM->open()) {
		return false;
	}

	if (!binTree->loadDB()) {
		fprintf(stderr, "Error: Unable to load database file %s.\n", databaseFilename.c_str());
		delete binTree;
		exit(1);
	}


    _context->determineOutputType();
    _recordOutputMgr->init(_context);

	while (!queryFRM->eof()) {
		Record *queryRecord = queryFRM->allocateAndGetNextRecord();
		if (queryRecord == NULL) {
			continue;
		}
		RecordKeyList hitSet(queryRecord);
		binTree->getHits(queryRecord, hitSet);
    	if (_context->getObeySplits()) {
    		RecordKeyList keySet(hitSet.getKey());
    		RecordKeyList resultSet;
    		_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
    		processHits(resultSet);
    	} else {
    		processHits(hitSet);
    	}
		queryFRM->deleteRecord(queryRecord);
	}
	queryFRM->close();

	//clean up.
	delete queryFRM;
	delete binTree;
	return true;
}