bool MergeFile::merge() { RecordKeyVector hitSet; FileRecordMgr *frm = _context->getFile(0); while (!frm->eof()) { Record *key = frm->getNextRecord(&hitSet); if (key == NULL) continue; _recordOutputMgr->printRecord(hitSet.getKey(), _context->getColumnOpsVal(hitSet)); } return true; }
FileRecordMgr *ContextBase::getNewFRM(const string &filename, int fileIdx) { if (_useMergedIntervals) { FileRecordMergeMgr *frm = new FileRecordMergeMgr(filename); frm->setStrandType(_desiredStrand); frm->setMaxDistance(_maxDistance); frm->setFileIdx(fileIdx); return frm; } else { FileRecordMgr *frm = new FileRecordMgr(filename); frm->setFileIdx(fileIdx); return frm; } }
bool ContextBase::isValidState() { if (!openFiles()) { return false; } if (!cmdArgsValid()) { return false; } if (!determineOutputType()) { return false; } if (_program != GROUP_BY && _files[0]->getRecordType() == FileRecordTypeChecker::NO_POS_PLUS_RECORD_TYPE) { _errorMsg = "ERROR: file "; _errorMsg.append(_files[0]->getFileName()); _errorMsg.append(" has non positional records, which are only valid for \n"); _errorMsg.append(" the groupBy tool. Perhaps you are using a header"); _errorMsg.append(" line(s) that starts with \n"); _errorMsg.append(" something other than \"#\", \"chrom\", or \"chr\" (any case)?"); return false; } if (getObeySplits()) { _splitBlockInfo = new BlockMgr(_overlapFractionA, _reciprocalFraction); } if (hasColumnOpsMethods()) { if (hasIntersectMethods()) { for (int i=0; i < (int)_dbFileIdxs.size(); i++) { FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } } else { FileRecordMgr *dbFile = getFile(0); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } //if user specified a precision, pass it to //keyList ops if (_reportPrecision != -1) { _keyListOps->setPrecision(_reportPrecision); } } return true; }
const string &ContextGroupBy::getDefaultHeader() { //groupBy does not support multiple databases. FileRecordMgr *frm = _files[0]; int numFields = frm->getNumFields(); _defaultHeader.clear(); ostringstream s; for (int i=1; i <= numFields; i++) { s << "col_"; s << i; s << "\t"; } _defaultHeader.append(s.str()); //change last tab into newline _defaultHeader[_defaultHeader.size()-1] = '\n'; return _defaultHeader; }
bool ContextBase::isValidState() { if (!openFiles()) { return false; } if (!cmdArgsValid()) { return false; } if (!determineOutputType()) { return false; } if (hasColumnOpsMethods()) { FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } return true; }
bool ContextBase::isValidState() { if (!openFiles()) { return false; } if (!cmdArgsValid()) { return false; } if (!determineOutputType()) { return false; } if (getObeySplits()) { _splitBlockInfo = new BlockMgr(_overlapFraction, _reciprocal); } if (hasColumnOpsMethods()) { if (hasIntersectMethods()) { for (int i=0; i < (int)_dbFileIdxs.size(); i++) { FileRecordMgr *dbFile = getFile(_dbFileIdxs[i]); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } } else { FileRecordMgr *dbFile = getFile(0); _keyListOps->setDBfileType(dbFile->getFileType()); if (!_keyListOps->isValidColumnOps(dbFile)) { return false; } } //if user specified a precision, pass it to //keyList ops if (_reportPrecision != -1) { _keyListOps->setPrecision(_reportPrecision); } } return true; }
bool ContextBase::openFiles() { //Make a vector of FileRecordMgr objects by going through the vector //of filenames and opening each one. if (_allFilesOpened) { return true; } _files.resize(_fileNames.size()); for (int i = 0; i < (int)_fileNames.size(); i++) { FileRecordMgr *frm = new FileRecordMgr(_fileNames[i], _sortedInput); if (hasGenomeFile()) { frm->setGenomeFile(_genomeFile); } frm->setFullBamFlags(_useFullBamTags); if (!frm->open()) { return false; } _files[i] = frm; } _allFilesOpened = true; return true; }
bool ContextBase::openFiles() { //Make a vector of FileRecordMgr objects by going through the vector //of filenames and opening each one. if (_allFilesOpened) { return true; } if (_fileNames.size() == 0) { //No input was specified. Error and exit. _errorMsg += "\n***** ERROR: No input file given. Exiting. *****"; return false; } _files.resize(_fileNames.size()); for (int i = 0; i < (int)_fileNames.size(); i++) { FileRecordMgr *frm = getNewFRM(_fileNames[i], i); if (hasGenomeFile()) { frm->setGenomeFile(_genomeFile); } //If we're going to do column operations, and an input file // is BAM, we'll need the full flags. if (hasColumnOpsMethods()) { setUseFullBamTags(true); } frm->setFullBamFlags(_useFullBamTags); frm->setIsSorted(_sortedInput); frm->setIoBufSize(_ioBufSize); frm->setIsGroupBy(_program == GROUP_BY); if (!frm->open(_inheader)) { return false; } if (_noEnforceCoordSort) { frm->setNoEnforceCoodSort(true); } _files[i] = frm; } _allFilesOpened = true; return true; }
bool FileIntersect::processUnsortedFiles() { const QuickString &databaseFilename = _context->getDatabaseFileName(); BinTree *binTree = new BinTree(_context->getDatabaseFileIdx(), _context); FileRecordMgr *queryFRM = new FileRecordMgr(_context->getQueryFileIdx(), _context); if (!queryFRM->open()) { return false; } if (!binTree->loadDB()) { fprintf(stderr, "Error: Unable to load database file %s.\n", databaseFilename.c_str()); delete binTree; exit(1); } _context->determineOutputType(); _recordOutputMgr->init(_context); while (!queryFRM->eof()) { Record *queryRecord = queryFRM->allocateAndGetNextRecord(); if (queryRecord == NULL) { continue; } RecordKeyList hitSet(queryRecord); binTree->getHits(queryRecord, hitSet); if (_context->getObeySplits()) { RecordKeyList keySet(hitSet.getKey()); RecordKeyList resultSet; _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); processHits(resultSet); } else { processHits(hitSet); } queryFRM->deleteRecord(queryRecord); } queryFRM->close(); //clean up. delete queryFRM; delete binTree; return true; }