bool NewChromSweep::next(RecordKeyVector &retList) { retList.clearVector(); //make sure the first read of the query file is tested for chrom sort order. bool needTestSortOrder = false; if (_currQueryRec != NULL) { _queryFRM->deleteRecord(_currQueryRec); } else { needTestSortOrder = true; } if (!nextRecord(true)) return false; // query EOF hit retList.setKey(_currQueryRec); if (needTestSortOrder) testChromOrder(_currQueryRec); if (allCurrDBrecsNull() && allCachesEmpty() && !_runToQueryEnd) { _testLastQueryRec = true; return false; } _currQueryChromName = _currQueryRec->getChrName(); masterScan(retList); if (_context->getSortOutput()) { retList.sortVector(); } _prevQueryChromName = _currQueryChromName; return true; }
bool NewChromSweep::init() { //Create new FileRecordMgrs for the input files. //Open them, and get the first record from each. //otherwise, return true. _queryFRM = _context->getFile(_context->getQueryFileIdx()); _dbFRMs.resize(_numDBs, NULL); for (int i=0; i < _numDBs; i++) { _dbFRMs[i] = _context->getDatabaseFile(i); } _currDbRecs.resize(_numDBs, NULL); if (!_context->hasGenomeFile()) { _fileTracks.resize(_numFiles, NULL); for (int i=0; i < _numFiles; i++) { _fileTracks[i] = new _orderTrackType; } } for (int i=0; i < _numDBs; i++) { nextRecord(false, i); testChromOrder(_currDbRecs[i]); } _caches.resize(_numDBs); _wasInitialized = true; return true; }
void NewChromSweep::closeOut(bool testChromOrderVal) { if (_testLastQueryRec) { testChromOrder(_currQueryRec); } while (!_queryFRM->eof()) { nextRecord(true); testChromOrder(_currQueryRec); } if (testChromOrderVal) testChromOrder(_currQueryRec); for (int i=0; i < _numDBs; i++) { while (!_dbFRMs[i]->eof()) { if (testChromOrderVal) testChromOrder(_currDbRecs[i]); nextRecord(false, i); } if (testChromOrderVal) testChromOrder(_currDbRecs[i]); } }
bool NewChromSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan) { const Record *dbRec = _currDbRecs[dbIdx]; if (_currQueryRec != NULL && _currQueryChromName != _prevQueryChromName) { _context->testNameConventions(_currQueryRec); testChromOrder(_currQueryRec); } if (dbRec != NULL) { _context->testNameConventions(dbRec); testChromOrder(dbRec); } // If the query rec and db rec are on the same chrom, stop. if (dbRec != NULL && _currQueryRec != NULL && _currQueryRec->sameChrom(dbRec)) return false; if (dbRec == NULL || _currQueryRec == NULL) return false; if (queryChromAfterDbRec(dbRec)) { // the query is ahead of the database. fast-forward the database to catch-up. QuickString oldDbChrom(dbRec->getChrName()); while (dbRec != NULL && queryChromAfterDbRec(dbRec)) { _dbFRMs[dbIdx]->deleteRecord(dbRec); if (!nextRecord(false, dbIdx)) break; dbRec = _currDbRecs[dbIdx]; const QuickString &newDbChrom = dbRec->getChrName(); if (newDbChrom != oldDbChrom) { testChromOrder(dbRec); oldDbChrom = newDbChrom; } } clearCache(dbIdx); return false; } else { // the database is ahead of the query. // scan the cache for remaining hits on the query's current chrom. if (wantScan) scanCache(dbIdx, retList); return true; } }
void CloseSweep::masterScan(RecordKeyVector &retList) { _qForward = _currQueryRec->getStrandVal() == Record::FORWARD; _qReverse = _currQueryRec->getStrandVal() == Record::REVERSE; if (_currQueryChromName != _prevQueryChromName) testChromOrder(_currQueryRec); if (_context->reportDistance()) { _finalDistances.clear(); } for (int i=0; i < _numDBs; i++) { //first clear out everything from the previous scan _minUpstreamRecs[i]->clear(); _minDownstreamRecs[i]->clear(); _overlapRecs[i]->clear(); if (dbFinished(i) || chromChange(i, retList, true)) { continue; } else { // scan the database cache for hits scanCache(i, retList); // skip if we hit the end of the DB // advance the db until we are ahead of the query. update hits and cache as necessary bool stopScanning = false; while (_currDbRecs[i] != NULL && _currQueryRec->sameChrom(_currDbRecs[i]) && !stopScanning) { if (considerRecord(_currDbRecs[i], i, stopScanning) == DELETE) { _dbFRMs[i]->deleteRecord(_currDbRecs[i]); _currDbRecs[i] = NULL; } else { _caches[i].push_back(_currDbRecs[i]); _currDbRecs[i] = NULL; } nextRecord(false, i); } } finalizeSelections(i, retList); } checkMultiDbs(retList); }
bool NewChromSweep::init() { //Create new FileRecordMgrs for the input files. //Open them, and get the first record from each. //otherwise, return true. _queryFRM = _context->getFile(_context->getQueryFileIdx()); _dbFRMs.resize(_numDBs, NULL); for (int i=0; i < _numDBs; i++) { _dbFRMs[i] = _context->getDatabaseFile(i); } _currDbRecs.resize(_numDBs, NULL); if (!_context->hasGenomeFile()) { _fileTracks.resize(_numFiles, NULL); for (int i=0; i < _numFiles; i++) { _fileTracks[i] = new _orderTrackType; } } for (int i=0; i < _numDBs; i++) { nextRecord(false, i); testChromOrder(_currDbRecs[i]); } _caches.resize(_numDBs); //determine whether to stop when the database end is hit, or keep going until the //end of the query file is hit as well. if (_context->getNoHit() || _context->getWriteCount() || _context->getWriteOverlap() || _context->getWriteAllOverlap() || _context->getLeftJoin()) { _runToQueryEnd = true; } _wasInitialized = true; return true; }
bool CloseSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan) { Record *dbRec = _currDbRecs[dbIdx]; bool haveQuery = _currQueryRec != NULL; bool haveDB = dbRec != NULL; if (haveQuery && _currQueryChromName != _prevQueryChromName) { _context->testNameConventions(_currQueryRec); testChromOrder(_currQueryRec); } if (haveDB) { _context->testNameConventions(dbRec); testChromOrder(dbRec); } // the files are on the same chrom if (haveQuery && (!haveDB || _currQueryRec->sameChrom(dbRec))) { //if this is the first time the query's chrom is ahead of the chrom that was in this cache, //then we have to clear the cache. if (!_caches[dbIdx].empty() && queryChromAfterDbRec(_caches[dbIdx].begin()->value())) { clearCache(dbIdx); clearClosestEndPos(dbIdx); } return false; } if (!haveQuery || !haveDB) return false; if (!_caches[dbIdx].empty() && (_caches[dbIdx].begin()->value()->sameChrom(_currQueryRec))) { //the newest DB record's chrom is ahead of the query, but the cache still //has old records on that query's chrom scanCache(dbIdx, retList); finalizeSelections(dbIdx, retList); return true; } // the query is ahead of the database. fast-forward the database to catch-up. if (queryChromAfterDbRec(dbRec)) { string oldDbChrom(dbRec->getChrName()); while (dbRec != NULL && queryChromAfterDbRec(dbRec)) { _dbFRMs[dbIdx]->deleteRecord(dbRec); if (!nextRecord(false, dbIdx)) break; dbRec = _currDbRecs[dbIdx]; const string &newDbChrom = dbRec->getChrName(); if (newDbChrom != oldDbChrom) { testChromOrder(dbRec); oldDbChrom = newDbChrom; } } clearCache(dbIdx); clearClosestEndPos(dbIdx); return false; } // the database is ahead of the query. else { // 1. scan the cache for remaining hits on the query's current chrom. if (wantScan) scanCache(dbIdx, retList); return true; } //control can't reach here, but compiler still wants a return statement. return true; }