bool FileIntersect::processSortedFiles() { // use the chromsweep algorithm to detect overlaps on the fly. NewChromSweep sweep(_context); if (!sweep.init()) { return false; } if (!_recordOutputMgr->init(_context)) { return false; } RecordKeyList hitSet; while (sweep.next(hitSet)) { if (_context->getObeySplits()) { RecordKeyList keySet(hitSet.getKey()); RecordKeyList resultSet(hitSet.getKey()); _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); processHits(resultSet); } else { processHits(hitSet); } } return true; }
Record *FileRecordMgr::allocateAndGetNextMergedRecord(WANT_STRAND_TYPE desiredStrand, int maxDistance) { RecordKeyList recList; if (!allocateAndGetNextMergedRecord(recList, desiredStrand, maxDistance)) { return NULL; } deleteAllMergedItemsButKey(recList); return const_cast<Record *>(recList.getKey()); //want key to be non-const }
bool FileMap::mapFiles() { NewChromSweep sweep(_context); if (!sweep.init()) { return false; } RecordKeyList hitSet; while (sweep.next(hitSet)) { if (_context->getObeySplits()) { RecordKeyList keySet(hitSet.getKey()); RecordKeyList resultSet(hitSet.getKey()); _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); _recordOutputMgr->printRecord(resultSet.getKey(), _context->getColumnOpsVal(resultSet)); } else { _recordOutputMgr->printRecord(hitSet.getKey(), _context->getColumnOpsVal(hitSet)); } } return true; }
void FileRecordMgr::deleteAllMergedItemsButKey(RecordKeyList &recList) { //if the key is also in the list, this method won't delete it. for (RecordKeyList::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { if (iter->value() == recList.getKey()) { continue; } deleteRecord(iter->value()); } recList.clearList(); }
bool Jaccard::getIntersectionAndUnion() { NewChromSweep sweep(_context); if (!sweep.init()) { return false; } RecordKeyList hitSet; while (sweep.next(hitSet)) { if (_context->getObeySplits()) { RecordKeyList keySet(hitSet.getKey()); RecordKeyList resultSet(hitSet.getKey()); _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); _intersectionVal += getTotalIntersection(&resultSet); } else { _intersectionVal += getTotalIntersection(&hitSet); } } sweep.closeOut(); unsigned long queryUnion = sweep.getQueryTotalRecordLength(); unsigned long dbUnion = sweep.getDatabaseTotalRecordLength(); _unionVal = queryUnion + dbUnion; return true; }
bool Fisher::getFisher() { NewChromSweep sweep(_context); if (!sweep.init()) { return false; } RecordKeyList hitSet; while (sweep.next(hitSet)) { if (_context->getObeySplits()) { RecordKeyList keySet(hitSet.getKey()); RecordKeyList resultSet(hitSet.getKey()); _blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet); _intersectionVal += getTotalIntersection(&resultSet); } else { _intersectionVal += getTotalIntersection(&hitSet); } } sweep.closeOut(); _queryLen = sweep.getQueryTotalRecordLength(); _dbLen = sweep.getDatabaseTotalRecordLength(); _unionVal = _queryLen + _dbLen; return true; }
void FileRecordMgr::deleteMergedRecord(RecordKeyList &recList) { deleteAllMergedItemsButKey(recList); deleteRecord(recList.getKey()); recList.setKey(NULL); }
bool FileRecordMgr::allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand, int maxDistance) { if (!recList.allClear()) { deleteMergedRecord(recList); } _mustBeForward = desiredStrand == SAME_STRAND_FORWARD; _mustBeReverse = desiredStrand == SAME_STRAND_REVERSE; Record *startRecord = tryToTakeFromStorage(); // if we couldn't use a previously stored record for starters, //then begin with a new one that matches strand criteria. while (startRecord == NULL) { startRecord = allocateAndGetNextRecord(); if (startRecord == NULL) { //hit EOF!! return false; } if (_mustBeForward && !startRecord->getStrand()) { //record is reverse, wanted forward. addToStorage(startRecord); startRecord = NULL; } else if (_mustBeReverse && startRecord->getStrand()) { //record is forward, wanted reverse addToStorage(startRecord); startRecord = NULL; } } // OK!! We have a start record! _mustBeForward = desiredStrand == SAME_STRAND_FORWARD || (desiredStrand == SAME_STRAND_EITHER && startRecord->getStrand()); _mustBeReverse = desiredStrand == SAME_STRAND_REVERSE || (desiredStrand == SAME_STRAND_EITHER && !startRecord->getStrand()); const QuickString &currChrom = startRecord->getChrName(); _foundChroms.insert(currChrom); bool madeComposite = false; recList.push_back(startRecord); recList.setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more. bool currStrand = startRecord->getStrand(); bool mustMatchStrand = desiredStrand != ANY_STRAND; int currEnd = startRecord->getEndPos(); //now look for more records to merge with this one. //stop when they're out of range, not on the same chromosome, or we hit EOF. //ignore if they don't comply with strand. Record *nextRecord = NULL; while (nextRecord == NULL) { bool takenFromStorage = false; nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage(); if (nextRecord == NULL) { nextRecord = allocateAndGetNextRecord(); } else { takenFromStorage = true; } if (nextRecord == NULL) { // EOF hit break; } const QuickString &newChrom = nextRecord->getChrName(); if (newChrom != currChrom) { //hit a different chromosome. if (_foundChroms.find(newChrom) == _foundChroms.end() || takenFromStorage) { //haven't seen this chromosome before. addToStorage(nextRecord); break; } else { //different strand, but we've already seen this chrom. File is not sorted. fprintf(stderr, "ERROR: Input file %s is not sorted by chromosome, startPos.\n", _context->getInputFileName(_contextFileIdx).c_str()); deleteRecord(nextRecord); deleteMergedRecord(recList); exit(1); } } int nextStart = nextRecord->getStartPos(); //is the record out of range? if (nextStart > currEnd + maxDistance) { //yes, it's out of range. addToStorage(nextRecord); break; } //ok, they're on the same chrom and in range. Are we happy with the strand? if (mustMatchStrand && nextRecord->getStrand() != currStrand) { //no, we're not. addToStorage(nextRecord); nextRecord = NULL; continue; } //everything's good! do a merge. recList.push_back(nextRecord); madeComposite = true; int nextEnd = nextRecord->getEndPos(); if (nextEnd > currEnd) { currEnd = nextEnd; } nextRecord = NULL; } if (madeComposite) { Record *newKey = _recordMgr->allocateRecord(); (*newKey) = (*startRecord); newKey->setEndPos(currEnd); recList.setKey(newKey); } _totalMergedRecordLength += (unsigned long)(recList.getKey()->getEndPos() - recList.getKey()->getStartPos()); return true; }