bool CloseSweep::chromChange(int dbIdx, RecordKeyVector &retList, bool wantScan) { Record *dbRec = _currDbRecs[dbIdx]; bool haveQuery = _currQueryRec != NULL; bool haveDB = dbRec != NULL; if (haveQuery && _currQueryChromName != _prevQueryChromName) { _context->testNameConventions(_currQueryRec); testChromOrder(_currQueryRec); } if (haveDB) { _context->testNameConventions(dbRec); testChromOrder(dbRec); } // the files are on the same chrom if (haveQuery && (!haveDB || _currQueryRec->sameChrom(dbRec))) { //if this is the first time the query's chrom is ahead of the chrom that was in this cache, //then we have to clear the cache. if (!_caches[dbIdx].empty() && queryChromAfterDbRec(_caches[dbIdx].begin()->value())) { clearCache(dbIdx); clearClosestEndPos(dbIdx); } return false; } if (!haveQuery || !haveDB) return false; if (!_caches[dbIdx].empty() && (_caches[dbIdx].begin()->value()->sameChrom(_currQueryRec))) { //the newest DB record's chrom is ahead of the query, but the cache still //has old records on that query's chrom scanCache(dbIdx, retList); finalizeSelections(dbIdx, retList); return true; } // the query is ahead of the database. fast-forward the database to catch-up. if (queryChromAfterDbRec(dbRec)) { string oldDbChrom(dbRec->getChrName()); while (dbRec != NULL && queryChromAfterDbRec(dbRec)) { _dbFRMs[dbIdx]->deleteRecord(dbRec); if (!nextRecord(false, dbIdx)) break; dbRec = _currDbRecs[dbIdx]; const string &newDbChrom = dbRec->getChrName(); if (newDbChrom != oldDbChrom) { testChromOrder(dbRec); oldDbChrom = newDbChrom; } } clearCache(dbIdx); clearClosestEndPos(dbIdx); return false; } // the database is ahead of the query. else { // 1. scan the cache for remaining hits on the query's current chrom. if (wantScan) scanCache(dbIdx, retList); return true; } //control can't reach here, but compiler still wants a return statement. return true; }
Record *FileRecordMergeMgr::getNextRecord(RecordKeyVector *recList) { //clear the recList if there is one, and if it has records // in it. if (recList != NULL && !recList->allClear()) { deleteMergedRecord(*recList); } _mustBeForward = _desiredStrand == SAME_STRAND_FORWARD; _mustBeReverse = _desiredStrand == SAME_STRAND_REVERSE; Record *startRecord = tryToTakeFromStorage(); // if we couldn't use a previously stored record for starters, //then begin with a new one that matches strand criteria. while (startRecord == NULL) { startRecord = FileRecordMgr::getNextRecord(); if (startRecord == NULL) { //hit EOF!! return NULL; } if ((_mustBeForward && (startRecord->getStrandVal() != Record::FORWARD)) || (_mustBeReverse && (startRecord->getStrandVal() != Record::REVERSE))) { //record is reverse, only want forward, OR record is forward, wanted reverse deleteRecord(startRecord); startRecord = NULL; continue; } if (startRecord->getStrandVal() == Record::UNKNOWN && _desiredStrand != ANY_STRAND) { //there is an unknown strand, but the user specified strandedness. deleteRecord(startRecord); startRecord = NULL; } } // OK!! We have a start record! Re-evaluate strand requirements for next recored. _mustBeForward = _desiredStrand == SAME_STRAND_FORWARD || (_desiredStrand == SAME_STRAND_EITHER && (startRecord->getStrandVal() == Record::FORWARD)); _mustBeReverse = _desiredStrand == SAME_STRAND_REVERSE || (_desiredStrand == SAME_STRAND_EITHER && (startRecord->getStrandVal() == Record::REVERSE)); bool mustKeepOpposite = (_desiredStrand == SAME_STRAND_EITHER); const QuickString &currChrom = startRecord->getChrName(); _foundChroms.insert(currChrom); bool madeComposite = false; if (recList != NULL) { recList->push_back(startRecord); recList->setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more. } Record::strandType currStrand = startRecord->getStrandVal(); bool mustMatchStrand = _desiredStrand != ANY_STRAND; int currEnd = startRecord->getEndPos(); //now look for more records to merge with this one. //stop when they're out of range, not on the same chromosome, or we hit EOF. //ignore if they don't comply with strand. Record *nextRecord = NULL; while (nextRecord == NULL) { bool takenFromStorage = false; nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage(); if (nextRecord == NULL) { nextRecord = FileRecordMgr::getNextRecord(); } else { takenFromStorage = true; } if (nextRecord == NULL) { // EOF hit break; } //delete any record from file with an unknown strand if we are doing stranded merge, but first check //that it's chrom was the same and it's not out of range. If either is true, stop scanning. bool mustDelete = (mustMatchStrand && nextRecord->getStrandVal() == Record::UNKNOWN); //check that we are still on the same chromosome. const QuickString &newChrom = nextRecord->getChrName(); if (newChrom != currChrom) { //hit a different chromosome. //haven't seen this chromosome before, sort order is already enforced in the base class method. if (!mustDelete) { addToStorage(nextRecord); } else { deleteRecord(nextRecord); } nextRecord = NULL; break; } //check whether it's in range int nextStart = nextRecord->getStartPos(); if (nextStart > currEnd + _maxDistance) { //no, it's out of range. if (!mustDelete) { addToStorage(nextRecord); } else { deleteRecord(nextRecord); } nextRecord = NULL; break; } // NOW, going back, we can delete any unknown strand records. But don't stop scanning. if (mustDelete) { deleteRecord(nextRecord); nextRecord = NULL; continue; } //if taken from file, and wrong strand, store or delete. if (!takenFromStorage && ((_mustBeForward && (nextRecord->getStrandVal() != Record::FORWARD)) || (_mustBeReverse && (nextRecord->getStrandVal() != Record::REVERSE)))) { if (mustKeepOpposite) { addToStorage(nextRecord); } else { deleteRecord(nextRecord); } nextRecord = NULL; continue; //get the next record } //ok, they're on the same chrom and in range, and the strand is good. Do a merge. if (recList != NULL) recList->push_back(nextRecord); madeComposite = true; int nextEnd = nextRecord->getEndPos(); if (nextEnd > currEnd) { currEnd = nextEnd; } nextRecord = NULL; } if (madeComposite) { Record *newKey = _recordMgr->allocateRecord(); (*newKey) = (*startRecord); newKey->setEndPos(currEnd); if (recList != NULL) recList->setKey(newKey); _totalMergedRecordLength += currEnd - newKey->getStartPos(); return newKey; } else { _totalMergedRecordLength += currEnd - startRecord->getStartPos(); return startRecord; } // _totalMergedRecordLength += (unsigned long)(recList->getKey()->getEndPos() - recList->getKey()->getStartPos()); // return const_cast<Record *>(recList->getKey()); }
bool FileRecordMgr::allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand, int maxDistance) { if (!recList.allClear()) { deleteMergedRecord(recList); } _mustBeForward = desiredStrand == SAME_STRAND_FORWARD; _mustBeReverse = desiredStrand == SAME_STRAND_REVERSE; Record *startRecord = tryToTakeFromStorage(); // if we couldn't use a previously stored record for starters, //then begin with a new one that matches strand criteria. while (startRecord == NULL) { startRecord = allocateAndGetNextRecord(); if (startRecord == NULL) { //hit EOF!! return false; } if (_mustBeForward && !startRecord->getStrand()) { //record is reverse, wanted forward. addToStorage(startRecord); startRecord = NULL; } else if (_mustBeReverse && startRecord->getStrand()) { //record is forward, wanted reverse addToStorage(startRecord); startRecord = NULL; } } // OK!! We have a start record! _mustBeForward = desiredStrand == SAME_STRAND_FORWARD || (desiredStrand == SAME_STRAND_EITHER && startRecord->getStrand()); _mustBeReverse = desiredStrand == SAME_STRAND_REVERSE || (desiredStrand == SAME_STRAND_EITHER && !startRecord->getStrand()); const QuickString &currChrom = startRecord->getChrName(); _foundChroms.insert(currChrom); bool madeComposite = false; recList.push_back(startRecord); recList.setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more. bool currStrand = startRecord->getStrand(); bool mustMatchStrand = desiredStrand != ANY_STRAND; int currEnd = startRecord->getEndPos(); //now look for more records to merge with this one. //stop when they're out of range, not on the same chromosome, or we hit EOF. //ignore if they don't comply with strand. Record *nextRecord = NULL; while (nextRecord == NULL) { bool takenFromStorage = false; nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage(); if (nextRecord == NULL) { nextRecord = allocateAndGetNextRecord(); } else { takenFromStorage = true; } if (nextRecord == NULL) { // EOF hit break; } const QuickString &newChrom = nextRecord->getChrName(); if (newChrom != currChrom) { //hit a different chromosome. if (_foundChroms.find(newChrom) == _foundChroms.end() || takenFromStorage) { //haven't seen this chromosome before. addToStorage(nextRecord); break; } else { //different strand, but we've already seen this chrom. File is not sorted. fprintf(stderr, "ERROR: Input file %s is not sorted by chromosome, startPos.\n", _context->getInputFileName(_contextFileIdx).c_str()); deleteRecord(nextRecord); deleteMergedRecord(recList); exit(1); } } int nextStart = nextRecord->getStartPos(); //is the record out of range? if (nextStart > currEnd + maxDistance) { //yes, it's out of range. addToStorage(nextRecord); break; } //ok, they're on the same chrom and in range. Are we happy with the strand? if (mustMatchStrand && nextRecord->getStrand() != currStrand) { //no, we're not. addToStorage(nextRecord); nextRecord = NULL; continue; } //everything's good! do a merge. recList.push_back(nextRecord); madeComposite = true; int nextEnd = nextRecord->getEndPos(); if (nextEnd > currEnd) { currEnd = nextEnd; } nextRecord = NULL; } if (madeComposite) { Record *newKey = _recordMgr->allocateRecord(); (*newKey) = (*startRecord); newKey->setEndPos(currEnd); recList.setKey(newKey); } _totalMergedRecordLength += (unsigned long)(recList.getKey()->getEndPos() - recList.getKey()->getStartPos()); return true; }