コード例 #1
0
ファイル: fisher.cpp プロジェクト: arq5x/bedtools2
unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList)
{
    unsigned long intersection = 0;
    Record *key = recList.getKey();
    CHRPOS keyStart = key->getStartPos();
    CHRPOS keyEnd = key->getEndPos();

    _overlapCounts += recList.size();
    // note that we truncate to a max size of 2.1GB
    _qsizes.push_back((int)(keyEnd - keyStart));

    int hitIdx = 0;
    for (RecordKeyVector::iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
        CHRPOS maxStart = max((*iter)->getStartPos(), keyStart);
        CHRPOS minEnd = min((*iter)->getEndPos(), keyEnd);
        _qsizes.push_back((int)(minEnd - maxStart));
        if (_context->getObeySplits()) {
            intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx);
            hitIdx++;
        } else {
            intersection += (unsigned long)(minEnd - maxStart);
        }
    }
    _numIntersections += (int)recList.size();
    return intersection;
}
コード例 #2
0
void RecordOutputMgr::reportOverlapSummary(RecordKeyVector &keyList)
{
    int numOverlapsFound = (int)keyList.size();
    if ((static_cast<ContextIntersect *>(_context))->getAnyHit() && numOverlapsFound > 0) {
        if (printKeyAndTerminate(keyList)) {
            return;
        }
        newline();
        if (needsFlush()) flush();
    } else if ((static_cast<ContextIntersect *>(_context))->getWriteCount()) {
        if (printKeyAndTerminate(keyList)) {
            return;
        }
        tab();
        int2str(numOverlapsFound, _outBuf, true);
        newline();
        if (needsFlush()) flush();
    } else if ((static_cast<ContextIntersect *>(_context))->getNoHit() && numOverlapsFound == 0) {
        if (printKeyAndTerminate(keyList)) {
            return;
        }
        newline();
        if (needsFlush()) flush();
    }
}
コード例 #3
0
ファイル: BlockMgr.cpp プロジェクト: Debian/bedtools2
int BlockMgr::findBlockedOverlaps(RecordKeyVector &keyList, RecordKeyVector &hitList, RecordKeyVector &resultList)
{
	bool deleteKeyBlocks = false;
	if (keyList.empty()) {
		//get all the blocks for the query record, put them in it's list.
		getBlocks(keyList, deleteKeyBlocks);
	}
	_overlapBases.clear();
	int keyBlocksSumLength = getTotalBlockLength(keyList);
	//Loop through every database record the query intersected with
	for (RecordKeyVector::const_iterator_type hitListIter = hitList.begin(); hitListIter != hitList.end(); hitListIter = hitList.next()) {
		RecordKeyVector hitBlocks(*hitListIter);
		bool deleteHitBlocks = false;
		getBlocks(hitBlocks, deleteHitBlocks); //get all blocks for the hit record.
		int hitBlockSumLength = getTotalBlockLength(hitBlocks); //get total length of the bocks for the hitRecord.
		int totalHitOverlap = 0;
		bool hitHasOverlap = false;

		//loop through every block of the database record.
		for (RecordKeyVector::const_iterator_type hitBlockIter = hitBlocks.begin(); hitBlockIter != hitBlocks.end(); hitBlockIter = hitBlocks.next()) {
			//loop through every block of the query record.
			for (RecordKeyVector::const_iterator_type keyListIter = keyList.begin(); keyListIter != keyList.end(); keyListIter = keyList.next()) {
				const Record *keyBlock = *keyListIter;
				const Record *hitBlock = *hitBlockIter;

				int maxStart = max(keyBlock->getStartPos(), hitBlock->getStartPos());
				int minEnd = min(keyBlock->getEndPos(), hitBlock->getEndPos());
				int overlap  = minEnd - maxStart;
				if (overlap > 0) {
					hitHasOverlap = true;
					totalHitOverlap += overlap;
				}

			}
		}
		if (hitHasOverlap) {
			if ((float) totalHitOverlap / (float)keyBlocksSumLength >= _overlapFraction) {
				if (_hasReciprocal &&
						((float)totalHitOverlap / (float)hitBlockSumLength >= _overlapFraction)) {
					_overlapBases.push_back(totalHitOverlap);
					resultList.push_back(*hitListIter);
				} else if (!_hasReciprocal) {
					_overlapBases.push_back(totalHitOverlap);
					resultList.push_back(*hitListIter);
				}
			}
		}
		if (deleteHitBlocks) {
			deleteBlocks(hitBlocks);
		}
	}
	if (deleteKeyBlocks) {
		deleteBlocks(keyList);
	}
	resultList.setKey(keyList.getKey());
	return (int)resultList.size();
}
コード例 #4
0
ファイル: coverageFile.cpp プロジェクト: ckottcam/bedtools2
void CoverageFile::doDefault(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
{
	size_t nonZeroBases = _queryLen - countBasesAtDepth(0);
	float coveredBases = (float)nonZeroBases / (float)_queryLen;

	_finalOutput = hits.size();
	_finalOutput.append("\t");
	_finalOutput.append(nonZeroBases);
	_finalOutput.append("\t");
	_finalOutput.append(_queryLen);
	_finalOutput.append("\t");
	format(coveredBases);

	outputMgr->printRecord(hits.getKey(), _finalOutput);
}
コード例 #5
0
ファイル: Fisher.cpp プロジェクト: daler/bedtools2
unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList)
{
    unsigned long intersection = 0;
    const Record *key = recList.getKey();
    int keyStart = key->getStartPos();
    int keyEnd = key->getEndPos();

    int hitIdx = 0;
    for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
        int maxStart = max((*iter)->getStartPos(), keyStart);
        int minEnd = min((*iter)->getEndPos(), keyEnd);
        if (_context->getObeySplits()) {
            intersection += _blockMgr->getOverlapBases(hitIdx);
            hitIdx++;
        } else {
            intersection += (unsigned long)(minEnd - maxStart);
        }
    }
    _numIntersections += (int)recList.size();
    return intersection;
}
コード例 #6
0
ファイル: jaccard.cpp プロジェクト: arq5x/bedtools2
unsigned long Jaccard::getTotalIntersection(RecordKeyVector &hits)
{
	unsigned long intersection = 0;
	Record *key = hits.getKey();
	CHRPOS keyStart = key->getStartPos();
	CHRPOS keyEnd = key->getEndPos();

	int hitIdx = 0;
	for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
		Record *currRec = *iter;
		CHRPOS maxStart = max(currRec->getStartPos(), keyStart);
		CHRPOS minEnd = min(currRec->getEndPos(), keyEnd);
		if (_context->getObeySplits()) {
			intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx);
			hitIdx++;
		} else {
			intersection += (unsigned long)(minEnd - maxStart);
		}
	}
	_numIntersections += (int)hits.size();
	return intersection;
}
コード例 #7
0
ファイル: coverageFile.cpp プロジェクト: ckottcam/bedtools2
void CoverageFile::doCounts(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
{
	_finalOutput = hits.size();
	outputMgr->printRecord(hits.getKey(), _finalOutput);
}
コード例 #8
0
ファイル: BlockMgr.cpp プロジェクト: LukeGoodsell/bedtools2
int BlockMgr::findBlockedOverlaps(RecordKeyVector &hitList, bool useOverlappingSubBlocks)
{
	RecordKeyVector keyList(hitList.getKey());
	bool deleteKeyBlocks = true;
	getBlocks(keyList, deleteKeyBlocks);
	
	_overlapBases.clear();
	int keyBlocksSumLength = getTotalBlockLength(keyList);

	//Loop through every database record the query intersected with
	RecordKeyVector::iterator_type hitListIter = hitList.begin();
	for (; hitListIter != hitList.end();) 
	{
		RecordKeyVector hitBlocks(*hitListIter);
		bool deleteHitBlocks = false;
		getBlocks(hitBlocks, deleteHitBlocks); //get all blocks for the hit record.
		int hitBlockSumLength = getTotalBlockLength(hitBlocks); //get total length of the bocks for the hitRecord.
		int totalHitOverlap = 0;
		bool hitHasOverlap = false;

		//loop through every block of the database record.
		RecordKeyVector::iterator_type hitBlockIter = hitBlocks.begin();
		for (; hitBlockIter != hitBlocks.end(); hitBlockIter = hitBlocks.next()) 
		{
			//loop through every block of the query record.
			RecordKeyVector::iterator_type keyListIter = keyList.begin();
			for (; keyListIter != keyList.end(); keyListIter = keyList.next()) 
			{
				const Record *keyBlock = *keyListIter;
				const Record *hitBlock = *hitBlockIter;
				int maxStart = max(keyBlock->getStartPos(), hitBlock->getStartPos());
				int minEnd   = min(keyBlock->getEndPos(), hitBlock->getEndPos());
				int overlap  = minEnd - maxStart;
				if (overlap > 0) 
				{
					hitHasOverlap = true;
					totalHitOverlap += overlap;
					if (useOverlappingSubBlocks == true)
					{
						(*hitListIter)->block_starts.push_back(maxStart);
						(*hitListIter)->block_ends.push_back(minEnd);
					}
				}
			}
		}
		if (hitHasOverlap && useOverlappingSubBlocks == false) 
		{
			bool enoughKeyOverlap = (float) totalHitOverlap / (float) keyBlocksSumLength >= _overlapFraction;
			bool enoughHitOverlap = (float) totalHitOverlap / (float) hitBlockSumLength  >= _overlapFraction;

			if (enoughKeyOverlap) 
			{
				if (_hasReciprocal && enoughHitOverlap)
				{
					//(*hitListIter)->setValid(true);
					_overlapBases.push_back(totalHitOverlap);
					hitListIter = hitList.next();
				} 
				else if (_hasReciprocal && !enoughHitOverlap)
				{
					hitList.erase();
					//(*hitListIter)->setValid(false);
				} 
				else if (!_hasReciprocal) 
				{
					//(*hitListIter)->setValid(true);
					_overlapBases.push_back(totalHitOverlap);
					hitListIter = hitList.next();
				}
			}
			else 
			{
				hitList.erase();
				//(*hitListIter)->setValid(false);
			}
		}
		else if (!hitHasOverlap && useOverlappingSubBlocks == false) 
		{
			hitList.erase();
			//(*hitListIter)->setValid(false);
		}
		else {
			hitListIter = hitList.next();
		}
		if (deleteHitBlocks)
		{
			deleteBlocks(hitBlocks);
		}
	} // end for loop through main hits
	if (deleteKeyBlocks) 
	{
		deleteBlocks(keyList);
	}
	return (int)hitList.size();
}
コード例 #9
0
ファイル: CloseSweep.cpp プロジェクト: nkindlon/bedtools2
void CloseSweep::checkMultiDbs(RecordKeyVector &retList) {
//	//can skip this method if there's only one DB, or if we are
//	//resolving closest hits for each db instead of all of them
    if (_context->getMultiDbMode() != ContextClosest::ALL_DBS ||  _numDBs == 1) return;


    // Get the K closest hits among multiple databases,
    // while not counting ties more than once if the tieMode
    // is "first" or "last".
    // Start by entering  all hits and their absolute distances
    // into a vector of distance tuples, then sort it.

    vector<distanceTuple> copyDists;
    int numHits = (int)retList.size();
    copyDists.resize(numHits);
    int i=0;
    for (RecordKeyVector::iterator_type iter = retList.begin(); iter != retList.end(); iter++) {
        int dist = _finalDistances[i];
        copyDists[i]._dist = abs(dist);
        copyDists[i]._rec = *iter;
        copyDists[i]._isNeg = dist < 0;
        i++;
    }

    // sort the hits by distance
    sort(copyDists.begin(), copyDists.end(), DistanceTupleSortAscFunctor());

    //now we want to build a map telling us what distances are tied,
    //and how many of each of these there are. Use a map<int, int>,
    //where the key is a distance (in absolute value) and the value
    //is the number of ties that that distance has.
    map<int, int> ties;
    for (vector<distanceTuple>::iterator i = copyDists.begin(); i != copyDists.end(); ++i)
        ++ties[i->_dist];

    // Clear the original list and distances, and re-populate
    // until we have the desired number of hits, skipping
    // over any unwanted ties.
    retList.clearVector();
    _finalDistances.clear();

    int hitsUsed = 0;
    for (i=0; i < numHits && hitsUsed < _kClosest; i++) {
        int dist = copyDists[i]._dist;
        bool isNeg = copyDists[i]._isNeg;
        //see if this distance is tied with any other
        map<int, int>::iterator iter = ties.find(dist);
        if (iter != ties.end()) {
            //tie was found
            int numTies = iter->second;
            if (!_allTies) {
                if (_firstTie) {
                    //just add the first of the ties
                    addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
                    i += numTies - 1; // use first, then skip ahead by the number of ties, minus 1 because
                    //loop is about to be incremented
                } else { //tieMode == LAST_TIE. Just add the last of the ties.
                    i += numTies -1;
                    dist = copyDists[i]._dist;
                    isNeg = copyDists[i]._isNeg;
                    addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
                }
            } else {
                // tieMode is ALL_TIES, use all hits.
                for (int j = i; j < i + numTies; j++) {
                    dist = copyDists[j]._dist;
                    isNeg = copyDists[j]._isNeg;
                    addSingleRec(copyDists[j]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
                }
                i += numTies - 1; //skip ahead by the number of ties, minus 1 because
                //loop is about to be incremented
            }
        } else {
            addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
        }
    }
}