bool NewChromSweep::next(RecordKeyVector &retList) {
	retList.clearVector();


	//make sure the first read of the query file is tested for chrom sort order.
	bool needTestSortOrder = false;
	if (_currQueryRec != NULL) {
		_queryFRM->deleteRecord(_currQueryRec);
	} else {
		needTestSortOrder = true;
	}

	if (!nextRecord(true)) return false; // query EOF hit
	retList.setKey(_currQueryRec);

	if (needTestSortOrder) testChromOrder(_currQueryRec);

	if (allCurrDBrecsNull() && allCachesEmpty() && !_runToQueryEnd) {
		_testLastQueryRec = true;
		return false;
	}
	_currQueryChromName = _currQueryRec->getChrName();

	masterScan(retList);

	if (_context->getSortOutput()) {
		retList.sortVector();
	}

	_prevQueryChromName = _currQueryChromName;
	return true;
}
bool NewChromSweep::init() {

    //Create new FileRecordMgrs for the input files.
    //Open them, and get the first record from each.
    //otherwise, return true.
    _queryFRM = _context->getFile(_context->getQueryFileIdx());

    _dbFRMs.resize(_numDBs, NULL);
    for (int i=0; i < _numDBs; i++) {
        _dbFRMs[i] = _context->getDatabaseFile(i);
    }

    _currDbRecs.resize(_numDBs, NULL);
    if (!_context->hasGenomeFile()) 
    {
        _fileTracks.resize(_numFiles, NULL);
        for (int i=0; i < _numFiles; i++) 
        {
            _fileTracks[i] = new _orderTrackType;
        }
    }

    for (int i=0; i < _numDBs; i++) {
        nextRecord(false, i);
        testChromOrder(_currDbRecs[i]);
    }

    _caches.resize(_numDBs);
    _wasInitialized = true;
    return true;
 }
void NewChromSweep::closeOut(bool testChromOrderVal) {
	if (_testLastQueryRec) {
		testChromOrder(_currQueryRec);
	}
	while (!_queryFRM->eof()) {
		nextRecord(true);
		testChromOrder(_currQueryRec);
	}
	if (testChromOrderVal) testChromOrder(_currQueryRec);

    for (int i=0; i < _numDBs; i++) {
    	while (!_dbFRMs[i]->eof()) {
    		if (testChromOrderVal) testChromOrder(_currDbRecs[i]);
    		nextRecord(false, i);
    	}
   		if (testChromOrderVal) testChromOrder(_currDbRecs[i]);

    }
}
bool NewChromSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan)
{
	const Record *dbRec = _currDbRecs[dbIdx];

	if (_currQueryRec != NULL && _currQueryChromName != _prevQueryChromName) {
		_context->testNameConventions(_currQueryRec);
		testChromOrder(_currQueryRec);
	}

	if (dbRec != NULL) {
		_context->testNameConventions(dbRec);
		testChromOrder(dbRec);
	}

	// If the query rec and db rec are on the same chrom, stop.
	if (dbRec != NULL && _currQueryRec != NULL && _currQueryRec->sameChrom(dbRec)) return false;


	if (dbRec == NULL || _currQueryRec == NULL) return false;

	if (queryChromAfterDbRec(dbRec)) {
		// the query is ahead of the database. fast-forward the database to catch-up.
		QuickString oldDbChrom(dbRec->getChrName());
		while (dbRec != NULL &&
				queryChromAfterDbRec(dbRec)) {
				_dbFRMs[dbIdx]->deleteRecord(dbRec);
			if (!nextRecord(false, dbIdx)) break;
			dbRec =  _currDbRecs[dbIdx];
			const QuickString &newDbChrom = dbRec->getChrName();
			if (newDbChrom != oldDbChrom) {
				testChromOrder(dbRec);
				oldDbChrom = newDbChrom;
			}
		}
		clearCache(dbIdx);
        return false;
    } else {
        // the database is ahead of the query.
        // scan the cache for remaining hits on the query's current chrom.
    	if (wantScan) scanCache(dbIdx, retList);
        return true;
    }
}
Exemple #5
0
void CloseSweep::masterScan(RecordKeyVector &retList) {

    _qForward = _currQueryRec->getStrandVal() == Record::FORWARD;
    _qReverse = _currQueryRec->getStrandVal() == Record::REVERSE;

    if (_currQueryChromName != _prevQueryChromName) testChromOrder(_currQueryRec);
    if (_context->reportDistance()) {
        _finalDistances.clear();
    }

    for (int i=0; i < _numDBs; i++) {

        //first clear out everything from the previous scan
        _minUpstreamRecs[i]->clear();
        _minDownstreamRecs[i]->clear();
        _overlapRecs[i]->clear();

        if (dbFinished(i) || chromChange(i, retList, true)) {
            continue;
        } else {

            // scan the database cache for hits
            scanCache(i, retList);

            // skip if we hit the end of the DB
            // advance the db until we are ahead of the query. update hits and cache as necessary
            bool stopScanning = false;
            while (_currDbRecs[i] != NULL &&
                    _currQueryRec->sameChrom(_currDbRecs[i]) &&
                    !stopScanning) {
                if (considerRecord(_currDbRecs[i], i, stopScanning) == DELETE) {
                    _dbFRMs[i]->deleteRecord(_currDbRecs[i]);
                    _currDbRecs[i] = NULL;
                } else {
                    _caches[i].push_back(_currDbRecs[i]);
                    _currDbRecs[i] = NULL;
                }
                nextRecord(false, i);
            }
        }
        finalizeSelections(i, retList);
    }
    checkMultiDbs(retList);
}
bool NewChromSweep::init() {
    
	//Create new FileRecordMgrs for the input files.
	//Open them, and get the first record from each.
	//otherwise, return true.
    _queryFRM = _context->getFile(_context->getQueryFileIdx());
    
    _dbFRMs.resize(_numDBs, NULL);
    for (int i=0; i < _numDBs; i++) {
    	_dbFRMs[i] = _context->getDatabaseFile(i);
    }

    _currDbRecs.resize(_numDBs, NULL);
    if (!_context->hasGenomeFile()) {
    	_fileTracks.resize(_numFiles, NULL);
    	for (int i=0; i < _numFiles; i++) {
    		_fileTracks[i] = new _orderTrackType;
    	}
    }


    for (int i=0; i < _numDBs; i++) {
    	nextRecord(false, i);
    	testChromOrder(_currDbRecs[i]);
    }

    _caches.resize(_numDBs);

    //determine whether to stop when the database end is hit, or keep going until the
    //end of the query file is hit as well.

    if (_context->getNoHit() || _context->getWriteCount() || _context->getWriteOverlap() || _context->getWriteAllOverlap() || _context->getLeftJoin()) {
    	_runToQueryEnd = true;
    }
    _wasInitialized = true;
    return true;
 }
Exemple #7
0
bool CloseSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan)
{
    Record *dbRec = _currDbRecs[dbIdx];

    bool haveQuery = _currQueryRec != NULL;
    bool haveDB = dbRec != NULL;

    if (haveQuery && _currQueryChromName != _prevQueryChromName) {
        _context->testNameConventions(_currQueryRec);
        testChromOrder(_currQueryRec);
    }

    if (haveDB) {
        _context->testNameConventions(dbRec);
        testChromOrder(dbRec);
    }

    // the files are on the same chrom
    if (haveQuery && (!haveDB || _currQueryRec->sameChrom(dbRec))) {

        //if this is the first time the query's chrom is ahead of the chrom that was in this cache,
        //then we have to clear the cache.
        if (!_caches[dbIdx].empty() && queryChromAfterDbRec(_caches[dbIdx].begin()->value())) {
            clearCache(dbIdx);
            clearClosestEndPos(dbIdx);
        }
        return false;
    }

    if (!haveQuery || !haveDB) return false;

    if (!_caches[dbIdx].empty() && (_caches[dbIdx].begin()->value()->sameChrom(_currQueryRec))) {
        //the newest DB record's chrom is ahead of the query, but the cache still
        //has old records on that query's chrom
        scanCache(dbIdx, retList);
        finalizeSelections(dbIdx, retList);
        return true;
    }


    // the query is ahead of the database. fast-forward the database to catch-up.
    if (queryChromAfterDbRec(dbRec)) {
        string oldDbChrom(dbRec->getChrName());

        while (dbRec != NULL &&
                queryChromAfterDbRec(dbRec)) {
            _dbFRMs[dbIdx]->deleteRecord(dbRec);
            if (!nextRecord(false, dbIdx)) break;
            dbRec =  _currDbRecs[dbIdx];
            const string &newDbChrom = dbRec->getChrName();
            if (newDbChrom != oldDbChrom) {
                testChromOrder(dbRec);
                oldDbChrom = newDbChrom;
            }
        }
        clearCache(dbIdx);
        clearClosestEndPos(dbIdx);
        return false;
    }
    // the database is ahead of the query.
    else {
        // 1. scan the cache for remaining hits on the query's current chrom.
        if (wantScan) scanCache(dbIdx, retList);

        return true;
    }

    //control can't reach here, but compiler still wants a return statement.
    return true;
}