Пример #1
0
Record *FileRecordMergeMgr::getNextRecord(RecordKeyVector *recList)
{
	//clear the recList if there is one, and if it has records
	// in it.
	if (recList != NULL && !recList->allClear()) {
		deleteMergedRecord(*recList);
	}

	_mustBeForward = _desiredStrand == SAME_STRAND_FORWARD;
	_mustBeReverse = _desiredStrand == SAME_STRAND_REVERSE;

	Record *startRecord = tryToTakeFromStorage();

	// if we couldn't use a previously stored record for starters,
	//then begin with a new one that matches strand criteria.
	while (startRecord == NULL) {
		startRecord = FileRecordMgr::getNextRecord();
		if (startRecord == NULL) { //hit EOF!!
			return NULL;
		}

		if ((_mustBeForward && (startRecord->getStrandVal() != Record::FORWARD)) || (_mustBeReverse && (startRecord->getStrandVal() != Record::REVERSE))) {
			//record is reverse, only want forward, OR record is forward, wanted reverse
			deleteRecord(startRecord);
			startRecord = NULL;
			continue;
		}
		if (startRecord->getStrandVal() == Record::UNKNOWN && _desiredStrand != ANY_STRAND) {
			//there is an unknown strand, but the user specified strandedness.
			deleteRecord(startRecord);
			startRecord = NULL;
		}
	}

	// OK!! We have a start record! Re-evaluate strand requirements for next recored.

	_mustBeForward = _desiredStrand == SAME_STRAND_FORWARD || (_desiredStrand == SAME_STRAND_EITHER && (startRecord->getStrandVal() == Record::FORWARD));
	_mustBeReverse = _desiredStrand == SAME_STRAND_REVERSE || (_desiredStrand == SAME_STRAND_EITHER && (startRecord->getStrandVal() == Record::REVERSE));
	bool mustKeepOpposite = (_desiredStrand == SAME_STRAND_EITHER);

	const QuickString &currChrom = startRecord->getChrName();
	_foundChroms.insert(currChrom);

	bool madeComposite = false;
	if (recList != NULL) {
		recList->push_back(startRecord);
		recList->setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more.
	}

	Record::strandType currStrand = startRecord->getStrandVal();
	bool mustMatchStrand = _desiredStrand != ANY_STRAND;

	int currEnd = startRecord->getEndPos();
	//now look for more records to merge with this one.
	//stop when they're out of range, not on the same chromosome, or we hit EOF.
	//ignore if they don't comply with strand.
	Record *nextRecord = NULL;
	while (nextRecord == NULL) {
		bool takenFromStorage = false;
		nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage();
		if (nextRecord == NULL) {
			nextRecord = FileRecordMgr::getNextRecord();
		} else {
			takenFromStorage = true;
		}
		if (nextRecord == NULL) { // EOF hit
			break;
		}
		//delete any record from file with an unknown strand if we are doing stranded merge, but first check
		//that it's chrom was the same and it's not out of range. If either is true, stop scanning.
		bool mustDelete = (mustMatchStrand && nextRecord->getStrandVal() == Record::UNKNOWN);

		//check that we are still on the same chromosome.
		const QuickString &newChrom = nextRecord->getChrName();
		if (newChrom != currChrom) { //hit a different chromosome.
			//haven't seen this chromosome before, sort order is already enforced in the base class method.
			if (!mustDelete) {
				addToStorage(nextRecord);
			} else {
				deleteRecord(nextRecord);
			}
			nextRecord = NULL;
			break;
		}

		//check whether it's in range
		int nextStart = nextRecord->getStartPos();
		if (nextStart > currEnd + _maxDistance) {
			//no, it's out of range.
			if (!mustDelete) {
				addToStorage(nextRecord);
			} else {
				deleteRecord(nextRecord);
			}
			nextRecord = NULL;
			break;
		}

		// NOW, going back, we can delete any unknown strand records. But don't stop scanning.
		if (mustDelete) {
			deleteRecord(nextRecord);
			nextRecord = NULL;
			continue;
		}
		//if taken from file, and wrong strand, store or delete.
		if (!takenFromStorage && ((_mustBeForward && (nextRecord->getStrandVal() != Record::FORWARD)) || (_mustBeReverse && (nextRecord->getStrandVal() != Record::REVERSE)))) {
			if (mustKeepOpposite) {
				addToStorage(nextRecord);
			} else {
				deleteRecord(nextRecord);
			}
			nextRecord = NULL;
			continue; //get the next record
		}
		//ok, they're on the same chrom and in range, and the strand is good. Do a merge.
		if (recList != NULL) recList->push_back(nextRecord);
		madeComposite = true;
		int nextEnd = nextRecord->getEndPos();
		if (nextEnd > currEnd) {
			currEnd = nextEnd;
		}
		nextRecord = NULL;
	}
	if (madeComposite) {
		Record *newKey = _recordMgr->allocateRecord();
		(*newKey) = (*startRecord);
		newKey->setEndPos(currEnd);
		if (recList != NULL) recList->setKey(newKey);
		_totalMergedRecordLength += currEnd - newKey->getStartPos();
		return newKey;
	} else {
		_totalMergedRecordLength += currEnd - startRecord->getStartPos();
		return startRecord;
	}
//	_totalMergedRecordLength += (unsigned long)(recList->getKey()->getEndPos() - recList->getKey()->getStartPos());
//	return const_cast<Record *>(recList->getKey());
}
Пример #2
0
bool FileRecordMgr::allocateAndGetNextMergedRecord(RecordKeyList & recList, WANT_STRAND_TYPE desiredStrand, int maxDistance)
{
	if (!recList.allClear()) {
		deleteMergedRecord(recList);
	}

	_mustBeForward = desiredStrand == SAME_STRAND_FORWARD;
	_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE;

	Record *startRecord = tryToTakeFromStorage();

	// if we couldn't use a previously stored record for starters,
	//then begin with a new one that matches strand criteria.
	while (startRecord == NULL) {
		startRecord = allocateAndGetNextRecord();
		if (startRecord == NULL) { //hit EOF!!
			return false;
		}

		if (_mustBeForward && !startRecord->getStrand()) {
			//record is reverse, wanted forward.
			addToStorage(startRecord);
			startRecord = NULL;
		} else if (_mustBeReverse && startRecord->getStrand()) {
			//record is forward, wanted reverse
			addToStorage(startRecord);
			startRecord = NULL;
		}
	}

	// OK!! We have a start record!

	_mustBeForward = desiredStrand == SAME_STRAND_FORWARD || (desiredStrand == SAME_STRAND_EITHER && startRecord->getStrand());
	_mustBeReverse = desiredStrand == SAME_STRAND_REVERSE || (desiredStrand == SAME_STRAND_EITHER && !startRecord->getStrand());

	const QuickString &currChrom = startRecord->getChrName();
	_foundChroms.insert(currChrom);

	bool madeComposite = false;
	recList.push_back(startRecord);
	recList.setKey(startRecord); //key of recList will just be the startRecord unless we're able to merge more.

	bool currStrand = startRecord->getStrand();
	bool mustMatchStrand = desiredStrand != ANY_STRAND;

	int currEnd = startRecord->getEndPos();
	//now look for more records to merge with this one.
	//stop when they're out of range, not on the same chromosome, or we hit EOF.
	//ignore if they don't comply with strand.
	Record *nextRecord = NULL;
	while (nextRecord == NULL) {
		bool takenFromStorage = false;
		nextRecord = mustMatchStrand ? tryToTakeFromStorage(currStrand) : tryToTakeFromStorage();
		if (nextRecord == NULL) {
			nextRecord = allocateAndGetNextRecord();
		} else {
			takenFromStorage = true;
		}
		if (nextRecord == NULL) { // EOF hit
			break;
		}
		const QuickString &newChrom = nextRecord->getChrName();
		if (newChrom != currChrom) { //hit a different chromosome.
			if (_foundChroms.find(newChrom) == _foundChroms.end() || takenFromStorage) {
				//haven't seen this chromosome before.
				addToStorage(nextRecord);
				break;
			} else {
				//different strand, but we've already seen this chrom. File is not sorted.
				fprintf(stderr, "ERROR: Input file %s is not sorted by chromosome, startPos.\n", _context->getInputFileName(_contextFileIdx).c_str());
				deleteRecord(nextRecord);
				deleteMergedRecord(recList);
				exit(1);
			}
		}
		int nextStart = nextRecord->getStartPos();
		//is the record out of range?
		if (nextStart > currEnd + maxDistance) {
			//yes, it's out of range.
			addToStorage(nextRecord);
			break;
		}

		//ok, they're on the same chrom and in range. Are we happy with the strand?
		if (mustMatchStrand && nextRecord->getStrand() != currStrand) {
			//no, we're not.
			addToStorage(nextRecord);
			nextRecord = NULL;
			continue;
		}
		//everything's good! do a merge.
		recList.push_back(nextRecord);
		madeComposite = true;
		int nextEnd = nextRecord->getEndPos();
		if (nextEnd > currEnd) {
			currEnd = nextEnd;
		}
		nextRecord = NULL;
	}
	if (madeComposite) {
		Record *newKey = _recordMgr->allocateRecord();
		(*newKey) = (*startRecord);
		newKey->setEndPos(currEnd);
		recList.setKey(newKey);
	}
	_totalMergedRecordLength += (unsigned long)(recList.getKey()->getEndPos() - recList.getKey()->getStartPos());
	return true;
}