示例#1
0
bool FileIntersect::processSortedFiles()
{
    // use the chromsweep algorithm to detect overlaps on the fly.
    NewChromSweep sweep(_context);

    if (!sweep.init()) {
    	return false;
    }
    if (!_recordOutputMgr->init(_context)) {
    	return false;
    }

    RecordKeyList hitSet;
    while (sweep.next(hitSet)) {
    	if (_context->getObeySplits()) {
    		RecordKeyList keySet(hitSet.getKey());
    		RecordKeyList resultSet(hitSet.getKey());
    		_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
    		processHits(resultSet);
    	} else {
    		processHits(hitSet);
    	}
    }
    return true;
}
示例#2
0
Record *FileRecordMgr::allocateAndGetNextMergedRecord(WANT_STRAND_TYPE desiredStrand, int maxDistance) {
	RecordKeyList recList;
	if (!allocateAndGetNextMergedRecord(recList, desiredStrand, maxDistance)) {
		return NULL;
	}
	deleteAllMergedItemsButKey(recList);
	return const_cast<Record *>(recList.getKey()); //want key to be non-const
}
示例#3
0
void FileRecordMgr::deleteAllMergedItemsButKey(RecordKeyList &recList) {
	//if the key is also in the list, this method won't delete it.
	for (RecordKeyList::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
		if (iter->value() == recList.getKey()) {
			continue;
		}
		deleteRecord(iter->value());
	}
	recList.clearList();
}
示例#4
0
bool FileMap::mapFiles()
{
    NewChromSweep sweep(_context);
    if (!sweep.init()) {
      return false;
    }
    RecordKeyList hitSet;
    while (sweep.next(hitSet)) {
    	if (_context->getObeySplits()) {
			RecordKeyList keySet(hitSet.getKey());
			RecordKeyList resultSet(hitSet.getKey());
			_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
			_recordOutputMgr->printRecord(resultSet.getKey(), _context->getColumnOpsVal(resultSet));
    	} else {
			_recordOutputMgr->printRecord(hitSet.getKey(), _context->getColumnOpsVal(hitSet));
		}
    }
    return true;
}
示例#5
0
bool NewChromSweep::next(RecordKeyList &next) {
	if (_currQueryRec != NULL) {
		_queryFRM->deleteRecord(_currQueryRec);
	}
	nextRecord(true);
	if (_currQueryRec == NULL) { //eof hit!
		return false;
	}

	if (_currDatabaseRec == NULL && _cache.empty() && !_runToQueryEnd) {
		return false;
	}
	_hits.clear();
	_currChromName = _currQueryRec->getChrName();
	// have we changed chromosomes?
	if (!chromChange()) {
		// scan the database cache for hits
		scanCache();
		//skip if we hit the end of the DB
		// advance the db until we are ahead of the query. update hits and cache as necessary
		while (_currDatabaseRec != NULL &&
				_currQueryRec->sameChrom(_currDatabaseRec) &&
				!(_currDatabaseRec->after(_currQueryRec))) {
			if (intersects(_currQueryRec, _currDatabaseRec)) {
				_hits.push_back(_currDatabaseRec);
			}
			if (_currQueryRec->after(_currDatabaseRec)) {
				_databaseFRM->deleteRecord(_currDatabaseRec);
				_currDatabaseRec = NULL;
			} else {
				_cache.push_back(_currDatabaseRec);
				_currDatabaseRec = NULL;
			}
			nextRecord(false);
		}
	}
	next.setKey(_currQueryRec);
	next.setListNoCopy(_hits);
	return true;
}
示例#6
0
bool Jaccard::getIntersectionAndUnion() {
	NewChromSweep sweep(_context);
	if (!sweep.init()) {
		return false;
	}
	RecordKeyList hitSet;
	while (sweep.next(hitSet)) {
		if (_context->getObeySplits()) {
			RecordKeyList keySet(hitSet.getKey());
			RecordKeyList resultSet(hitSet.getKey());
			_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
			_intersectionVal += getTotalIntersection(&resultSet);
		} else {
			_intersectionVal += getTotalIntersection(&hitSet);
		}
	}

	sweep.closeOut();
	unsigned long queryUnion = sweep.getQueryTotalRecordLength();
	unsigned long dbUnion = sweep.getDatabaseTotalRecordLength();

	_unionVal = queryUnion + dbUnion;
	return true;
}
示例#7
0
bool Fisher::getFisher() {
	NewChromSweep sweep(_context);
	if (!sweep.init()) {
		return false;
	}
	RecordKeyList hitSet;
	while (sweep.next(hitSet)) {
		if (_context->getObeySplits()) {
			RecordKeyList keySet(hitSet.getKey());
			RecordKeyList resultSet(hitSet.getKey());
			_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
			_intersectionVal += getTotalIntersection(&resultSet);
		} else {
			_intersectionVal += getTotalIntersection(&hitSet);
		}
	}

	sweep.closeOut();
	_queryLen = sweep.getQueryTotalRecordLength();
	_dbLen = sweep.getDatabaseTotalRecordLength();

	_unionVal = _queryLen + _dbLen;
	return true;
}
示例#8
0
void FileRecordMgr::deleteMergedRecord(RecordKeyList &recList)
{
	deleteAllMergedItemsButKey(recList);
	deleteRecord(recList.getKey());
	recList.setKey(NULL);
}
示例#9
0
bool FileRecordMgr::allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand, int maxDistance)
{
	if (!recList.allClear()) {
		deleteMergedRecord(recList);
	}

	_mustBeForward = desiredStrand == SAME_STRAND_FORWARD;
	_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE;

	Record *startRecord = tryToTakeFromStorage();

	// if we couldn't use a previously stored record for starters,
	//then begin with a new one that matches strand criteria.
	while (startRecord == NULL) {
		startRecord = allocateAndGetNextRecord();
		if (startRecord == NULL) { //hit EOF!!
			return false;
		}

		if (_mustBeForward && !startRecord->getStrand()) {
			//record is reverse, wanted forward.
			addToStorage(startRecord);
			startRecord = NULL;
		} else if (_mustBeReverse && startRecord->getStrand()) {
			//record is forward, wanted reverse
			addToStorage(startRecord);
			startRecord = NULL;
		}
	}

	// OK!! We have a start record!

	_mustBeForward = desiredStrand == SAME_STRAND_FORWARD || (desiredStrand == SAME_STRAND_EITHER && startRecord->getStrand());
	_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE || (desiredStrand == SAME_STRAND_EITHER && !startRecord->getStrand());

	const QuickString &currChrom = startRecord->getChrName();
	_foundChroms.insert(currChrom);

	bool madeComposite = false;
	recList.push_back(startRecord);
	recList.setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more.

	bool currStrand = startRecord->getStrand();
	bool mustMatchStrand = desiredStrand != ANY_STRAND;

	int currEnd = startRecord->getEndPos();
	//now look for more records to merge with this one.
	//stop when they're out of range, not on the same chromosome, or we hit EOF.
	//ignore if they don't comply with strand.
	Record *nextRecord = NULL;
	while (nextRecord == NULL) {
		bool takenFromStorage = false;
		nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage();
		if (nextRecord == NULL) {
			nextRecord = allocateAndGetNextRecord();
		} else {
			takenFromStorage = true;
		}
		if (nextRecord == NULL) { // EOF hit
			break;
		}
		const QuickString &newChrom = nextRecord->getChrName();
		if (newChrom != currChrom) { //hit a different chromosome.
			if (_foundChroms.find(newChrom) == _foundChroms.end() || takenFromStorage) {
				//haven't seen this chromosome before.
				addToStorage(nextRecord);
				break;
			} else {
				//different strand, but we've already seen this chrom. File is not sorted.
				fprintf(stderr, "ERROR: Input file %s is not sorted by chromosome, startPos.\n", _context->getInputFileName(_contextFileIdx).c_str());
				deleteRecord(nextRecord);
				deleteMergedRecord(recList);
				exit(1);
			}
		}
		int nextStart = nextRecord->getStartPos();
		//is the record out of range?
		if (nextStart > currEnd + maxDistance) {
			//yes, it's out of range.
			addToStorage(nextRecord);
			break;
		}

		//ok, they're on the same chrom and in range. Are we happy with the strand?
		if (mustMatchStrand && nextRecord->getStrand() != currStrand) {
			//no, we're not.
			addToStorage(nextRecord);
			nextRecord = NULL;
			continue;
		}
		//everything's good! do a merge.
		recList.push_back(nextRecord);
		madeComposite = true;
		int nextEnd = nextRecord->getEndPos();
		if (nextEnd > currEnd) {
			currEnd = nextEnd;
		}
		nextRecord = NULL;
	}
	if (madeComposite) {
		Record *newKey = _recordMgr->allocateRecord();
		(*newKey) = (*startRecord);
		newKey->setEndPos(currEnd);
		recList.setKey(newKey);
	}
	_totalMergedRecordLength += (unsigned long)(recList.getKey()->getEndPos() - recList.getKey()->getStartPos());
	return true;
}