Exemplo n.º 1
0
unsigned long Jaccard::getTotalIntersection(RecordKeyVector &hits)
{
	unsigned long intersection = 0;
	Record *key = hits.getKey();
	CHRPOS keyStart = key->getStartPos();
	CHRPOS keyEnd = key->getEndPos();

	int hitIdx = 0;
	for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
		Record *currRec = *iter;
		CHRPOS maxStart = max(currRec->getStartPos(), keyStart);
		CHRPOS minEnd = min(currRec->getEndPos(), keyEnd);
		if (_context->getObeySplits()) {
			intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx);
			hitIdx++;
		} else {
			intersection += (unsigned long)(minEnd - maxStart);
		}
	}
	_numIntersections += (int)hits.size();
	return intersection;
}
Exemplo n.º 2
0
void CoverageFile::makeDepthCount(RecordKeyVector &hits) {
	const Record *key = hits.getKey();
	_queryOffset = key->getStartPos();
	_queryLen = (size_t)(key->getEndPos() - _queryOffset);
	_totalQueryLen += _queryLen;

	//resize depth array if needed
	if (_depthArrayCapacity < _queryLen) {
		_depthArray = (size_t*)realloc(_depthArray, sizeof(size_t) * _queryLen);
		_depthArrayCapacity = _queryLen;
		memset(_depthArray, 0, sizeof(size_t) * _depthArrayCapacity);
	}

	//loop through hits, which may not be in sorted order, due to
	//potential multiple databases, and increment the depth array as needed.
	for (RecordKeyVector::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
		const Record *dbRec = *iter;
		int dbStart = dbRec->getStartPos();
		int dbEnd = dbRec->getEndPos();
		int maxStart = max(_queryOffset, dbStart);
		int minEnd = min(dbEnd, key->getEndPos());

		for (int i=maxStart; i < minEnd; i++) {
			_depthArray[i - _queryOffset]++;
		}
	}
}
Exemplo n.º 3
0
int BlockMgr::findBlockedOverlaps(RecordKeyVector &hitList, bool useOverlappingSubBlocks)
{
	RecordKeyVector keyList(hitList.getKey());
	bool deleteKeyBlocks = true;
	getBlocks(keyList, deleteKeyBlocks);
	
	_overlapBases.clear();
	int keyBlocksSumLength = getTotalBlockLength(keyList);

	//Loop through every database record the query intersected with
	RecordKeyVector::iterator_type hitListIter = hitList.begin();
	for (; hitListIter != hitList.end();) 
	{
		RecordKeyVector hitBlocks(*hitListIter);
		bool deleteHitBlocks = false;
		getBlocks(hitBlocks, deleteHitBlocks); //get all blocks for the hit record.
		int hitBlockSumLength = getTotalBlockLength(hitBlocks); //get total length of the bocks for the hitRecord.
		int totalHitOverlap = 0;
		bool hitHasOverlap = false;

		//loop through every block of the database record.
		RecordKeyVector::iterator_type hitBlockIter = hitBlocks.begin();
		for (; hitBlockIter != hitBlocks.end(); hitBlockIter = hitBlocks.next()) 
		{
			//loop through every block of the query record.
			RecordKeyVector::iterator_type keyListIter = keyList.begin();
			for (; keyListIter != keyList.end(); keyListIter = keyList.next()) 
			{
				const Record *keyBlock = *keyListIter;
				const Record *hitBlock = *hitBlockIter;
				int maxStart = max(keyBlock->getStartPos(), hitBlock->getStartPos());
				int minEnd   = min(keyBlock->getEndPos(), hitBlock->getEndPos());
				int overlap  = minEnd - maxStart;
				if (overlap > 0) 
				{
					hitHasOverlap = true;
					totalHitOverlap += overlap;
					if (useOverlappingSubBlocks == true)
					{
						(*hitListIter)->block_starts.push_back(maxStart);
						(*hitListIter)->block_ends.push_back(minEnd);
					}
				}
			}
		}
		if (hitHasOverlap && useOverlappingSubBlocks == false) 
		{
			bool enoughKeyOverlap = (float) totalHitOverlap / (float) keyBlocksSumLength >= _overlapFraction;
			bool enoughHitOverlap = (float) totalHitOverlap / (float) hitBlockSumLength  >= _overlapFraction;

			if (enoughKeyOverlap) 
			{
				if (_hasReciprocal && enoughHitOverlap)
				{
					//(*hitListIter)->setValid(true);
					_overlapBases.push_back(totalHitOverlap);
					hitListIter = hitList.next();
				} 
				else if (_hasReciprocal && !enoughHitOverlap)
				{
					hitList.erase();
					//(*hitListIter)->setValid(false);
				} 
				else if (!_hasReciprocal) 
				{
					//(*hitListIter)->setValid(true);
					_overlapBases.push_back(totalHitOverlap);
					hitListIter = hitList.next();
				}
			}
			else 
			{
				hitList.erase();
				//(*hitListIter)->setValid(false);
			}
		}
		else if (!hitHasOverlap && useOverlappingSubBlocks == false) 
		{
			hitList.erase();
			//(*hitListIter)->setValid(false);
		}
		else {
			hitListIter = hitList.next();
		}
		if (deleteHitBlocks)
		{
			deleteBlocks(hitBlocks);
		}
	} // end for loop through main hits
	if (deleteKeyBlocks) 
	{
		deleteBlocks(keyList);
	}
	return (int)hitList.size();
}
Exemplo n.º 4
0
void BlockMgr::deleteBlocks(RecordKeyVector &keyList)
{
	for (RecordKeyVector::iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) {
		_blockRecordsMgr->deleteRecord(*iter);
	}
	keyList.clearVector();
}
Exemplo n.º 5
0
int BlockMgr::getTotalBlockLength(RecordKeyVector &keyList) {
	int sum = 0;
	for (RecordKeyVector::iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) {
		const Record *record = *iter;
		sum += record->getEndPos() - record->getStartPos();
	}
	return sum;
}
Exemplo n.º 6
0
void FileRecordMergeMgr::deleteAllMergedItemsButKey(RecordKeyVector &recList) {
	//if the key is also in the list, this method won't delete it.
	for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
		if (*iter == recList.getKey()) {
			continue;
		}
		deleteRecord(*iter);
	}
	recList.clearVector();
}
Exemplo n.º 7
0
int BlockMgr::findBlockedOverlaps(RecordKeyVector &keyList, RecordKeyVector &hitList,
	                              RecordKeyVector &resultList, RecordKeyVector *overlapList)
{
	bool deleteKeyBlocks = false;
	if (keyList.empty()) {
		//get all the blocks for the query record, put them in it's list.
		getBlocks(keyList, deleteKeyBlocks);
	}
	_overlapBases.clear();
	int keyBlocksSumLength = getTotalBlockLength(keyList);

	//Loop through every database record the query intersected with
	RecordKeyVector::iterator_type hitListIter = hitList.begin();
	for (; hitListIter != hitList.end(); hitListIter = hitList.next())
	{
		RecordKeyVector hitBlocks(*hitListIter);
		bool deleteHitBlocks = false;
		getBlocks(hitBlocks, deleteHitBlocks); //get all blocks for the hit record.
		int hitBlockSumLength = getTotalBlockLength(hitBlocks); //get total length of the bocks for the hitRecord.
		int totalHitOverlap = 0;
		bool hitHasOverlap = false;

		//loop through every block of the database record.
		RecordKeyVector::iterator_type hitBlockIter = hitBlocks.begin();
		for (; hitBlockIter != hitBlocks.end(); hitBlockIter = hitBlocks.next()) 
		{
			//loop through every block of the query record.
			RecordKeyVector::iterator_type keyListIter = keyList.begin();
			for (; keyListIter != keyList.end(); keyListIter = keyList.next()) 
			{
				const Record *keyBlock = *keyListIter;
				const Record *hitBlock = *hitBlockIter;

				int maxStart = max(keyBlock->getStartPos(), hitBlock->getStartPos());
				int minEnd = min(keyBlock->getEndPos(), hitBlock->getEndPos());
				int overlap  = minEnd - maxStart;
				if (overlap > 0) {
					hitHasOverlap = true;
					if (overlapList != NULL) {
						overlapList->push_back(allocateAndAssignRecord(keyList.getKey(), maxStart, minEnd));
					}
					totalHitOverlap += overlap;
				}
			}
		}
		if (hitHasOverlap) {
			if ((float) totalHitOverlap / (float)keyBlocksSumLength >= _overlapFraction) {
				if (_hasReciprocal &&
						((float)totalHitOverlap / (float)hitBlockSumLength >= _overlapFraction)) {
					_overlapBases.push_back(totalHitOverlap);
					resultList.push_back(*hitListIter);
				} else if (!_hasReciprocal) {
					_overlapBases.push_back(totalHitOverlap);
					resultList.push_back(*hitListIter);
				}
			}
		}
		if (deleteHitBlocks) {
			deleteBlocks(hitBlocks);
		}
	}
	if (deleteKeyBlocks) {
		deleteBlocks(keyList);
	}
	resultList.setKey(keyList.getKey());
	return (int)resultList.size();
}
Exemplo n.º 8
0
void RecordOutputMgr::printRecord(RecordKeyVector &keyList, RecordKeyVector *blockList)
{
    if (needsFlush()) {
        flush();
    }
    //The first time we print a record is when we print any header, because the header
    //hasn't been read from the query file until after the first record has also been read.
    checkForHeader();

    const_cast<Record *>(keyList.getKey())->undoZeroLength();
    _currBamBlockList = blockList;

    if (_context->getProgram() == ContextBase::INTERSECT) {
        if (_printable) {
            if (keyList.empty()) {
                if ((static_cast<ContextIntersect *>(_context))->getWriteAllOverlap()) {
                    // -wao the user wants to force the reporting of 0 overlap
                    if (printKeyAndTerminate(keyList)) {

                        _currBamBlockList = NULL;
                        return;
                    }
                    tab();
                    null(false, true);
                    tab();
                    _outBuf.append('0');
                    newline();
                    if (needsFlush()) flush();
                }
                else if ((static_cast<ContextIntersect *>(_context))->getLeftJoin()) {
                    if (printKeyAndTerminate(keyList)) {
                        _currBamBlockList = NULL;

                        return;
                    }
                    tab();
                    null(false, true);
                    newline();
                    if (needsFlush()) flush();
                    _currBamBlockList = NULL;

                    return;
                }
            } else {
                if (printBamRecord(keyList, true) == BAM_AS_BAM) {
                    _currBamBlockList = NULL;

                    return;
                }
                int hitIdx = 0;
                for (RecordKeyVector::const_iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) {
                    reportOverlapDetail(keyList.getKey(), *iter, hitIdx);
                    hitIdx++;
                }
            }
        } else { // not printable
            reportOverlapSummary(keyList);
        }
        _currBamBlockList = NULL;
    } else if (_context->getProgram() == ContextBase::SAMPLE) {
        if (!printKeyAndTerminate(keyList)) {
            newline();
        }
        _currBamBlockList = NULL;

        return;
    } else if (_context->getProgram() == ContextBase::MAP) {
        printKeyAndTerminate(keyList);
        _currBamBlockList = NULL;

        return;
    } else if (_context->getProgram() == ContextBase::MERGE) {
        printKeyAndTerminate(keyList);
        _currBamBlockList = NULL;

        return;
    }
}
Exemplo n.º 9
0
void RecordOutputMgr::printClosest(RecordKeyVector &keyList, const vector<int> *dists) {
    const ContextClosest *context = static_cast<const ContextClosest *>(_context);
    bool deleteBlocks = false;
    RecordKeyVector blockList(keyList.getKey());
    if (keyList.getKey()->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
        _bamBlockMgr->getBlocks(blockList, deleteBlocks);
        _currBamBlockList = &blockList;
    }
    if (!keyList.empty()) {
        int distCount = 0;
        for (RecordKeyVector::const_iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) {
            printKey(keyList.getKey());
            tab();
            addDbFileId((*iter)->getFileIdx());
            (*iter)->print(_outBuf);
            if (dists != NULL) {
                tab();
                _outBuf.append((*dists)[distCount]);
                distCount++;
            }
            newline();
            if (needsFlush()) flush();
        }
    } else {
        printKey(keyList.getKey());
        tab();
        null(true, false);
        if (context->reportDistance()) {
            tab();
            _outBuf.append(-1);
        }
        newline();
    }
    if (deleteBlocks) {
        _bamBlockMgr->deleteBlocks(blockList);
        _currBamBlockList = NULL;
    }
    return;
}
Exemplo n.º 10
0
void RecordOutputMgr::printClosest(RecordKeyVector &keyList, const vector<int> *dists) {

	//The first time we print a record is when we print any header, because the header
	//hasn't been read from the query file until after the first record has also been read.
	checkForHeader();

	const ContextClosest *context = static_cast<const ContextClosest *>(_context);
	bool deleteBlocks = false;
	const Record *keyRec = keyList.getKey();
	RecordKeyVector blockList(keyRec);
	if (keyRec->getType() == FileRecordTypeChecker::BAM_RECORD_TYPE) {
		_bamBlockMgr->getBlocks(blockList, deleteBlocks);
		_currBamBlockList = &blockList;
	}
	if (!keyList.empty()) {
		int distCount = 0;
		for (RecordKeyVector::const_iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) {
			const Record *hitRec = *iter;
			printKey(keyRec, keyRec->getStartPosStr(), keyRec->getEndPosStr());
			tab();
			addDbFileId(hitRec->getFileIdx());
			printKey(hitRec, hitRec->getStartPosStr(), hitRec->getEndPosStr());
			if (dists != NULL) {
				tab();
				int dist = (*dists)[distCount];
				//if not using sign distance, use absolute value instead.
				dist = context->signDistance() ? dist : abs(dist);
				_outBuf.append(dist);
				distCount++;
			}
			newline();
			if (needsFlush()) flush();
		}
	} else {
		printKey(keyRec, keyRec->getStartPosStr(), keyRec->getEndPosStr());
		tab();
		null(false, true);
		if (context->reportDistance()) {
			tab();
			_outBuf.append(-1);
		}
		newline();
	}
	if (deleteBlocks) {
		_bamBlockMgr->deleteBlocks(blockList);
		_currBamBlockList = NULL;
	}
	return;
}
Exemplo n.º 11
0
void SubtractFile::subtractHits(RecordKeyVector &hits) {
	if (hits.empty()) {
        // no intersection, nothing to subtract.
        // just copy key to hits as if it were a
        // self-intersection. This is just for reporting
        // purposes.
        hits.push_back(hits.getKey());
		return;
	}

	if (upCast(_context)->getRemoveAll() && upCast(_context)->getSubtractFraction() == 0.0) {
		// hits aren't empty, meaning there is intersection,
		// so we want to not report the hit.
		_dontReport = true;
		return;
	}

	//loop through hits. Track which bases in query were covered
	Record *keyRec = hits.getKey();
	int keyStart = keyRec->getStartPos();
	int keyEnd = keyRec->getEndPos();

	//this vector of bools will represent the bases of the query.
	//for each base, true means uncovered, false means covered.
	//they begin as all uncovered.
	vector<bool> keyBases(keyEnd - keyStart, true);

	//now loop through the hits, and cover corresponding query bases
	//by setting them to false.
	bool basesRemoved = false;
	for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
		Record *hitRec = *iter;
		int hitStart = hitRec->getStartPos();
		int hitEnd = hitRec->getEndPos();

		int startIdx = max(keyStart, hitStart) - keyStart;
		int endIdx = min(keyEnd, hitEnd) - keyStart;

		int keyLen = keyEnd - keyStart;
		int coveredLen = endIdx - startIdx;
		float coveragePct = (float)coveredLen / (float)keyLen;
		//for each base in the hit, set the base in the query to false.
		//this effectively "erases" the covered bits. Only do
		if (upCast(_context)->getRemoveSum() || coveragePct >= upCast(_context)->getSubtractFraction()) {
			std::fill(keyBases.begin() + startIdx, keyBases.begin() + endIdx, false);
			basesRemoved = true;
		}
	}

	if (!basesRemoved) {
		//treat as if there were no intersection
		hits.clearVector();
		hits.push_back(hits.getKey());
		return;
	} else if (upCast(_context)->getRemoveAll()) {
		_dontReport = true;
		return;
	}
	// if the -N option is used ( removeSum), do not report if the percentage of
	// uniquely covered bases exceeds the overlap fraction.
	if (upCast(_context)->getRemoveSum()) {
		//determine how many bases are left uncovered.
		int numBasesUncovered = std::accumulate(keyBases.begin(), keyBases.end(), 0);
		//determine percentage that are covered.
		float pctCovered = 1.0 - (float)numBasesUncovered / (float)(keyEnd - keyStart);
		if (pctCovered > upCast(_context)->getSubtractFraction()) {
			_dontReport = true;
			return;
		} else {
            hits.clearVector();
            hits.push_back(hits.getKey());
        }
		return;
	}

	//now make "blocks" out of the query's remaining stretches of
	//uncovered bases.
	hits.clearVector();
    for (int i = 0; i < (int)keyBases.size(); i++) {
        if (keyBases[i] == true) {
            int blockStart = keyStart + i;
            while (keyBases[i] == true && i < (int)keyBases.size()) {
                i++;
            }
            int blockEnd = min(keyStart + i, keyEnd);
            hits.push_back(_tmpBlocksMgr->allocateAndAssignRecord(keyRec, blockStart, blockEnd));
        }
    }
    _deleteTmpBlocks = true;

}
Exemplo n.º 12
0
unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList)
{
    unsigned long intersection = 0;
    const Record *key = recList.getKey();
    int keyStart = key->getStartPos();
    int keyEnd = key->getEndPos();

    int hitIdx = 0;
    for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
        int maxStart = max((*iter)->getStartPos(), keyStart);
        int minEnd = min((*iter)->getEndPos(), keyEnd);
        if (_context->getObeySplits()) {
            intersection += _blockMgr->getOverlapBases(hitIdx);
            hitIdx++;
        } else {
            intersection += (unsigned long)(minEnd - maxStart);
        }
    }
    _numIntersections += (int)recList.size();
    return intersection;
}
Exemplo n.º 13
0
unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList)
{
    unsigned long intersection = 0;
    Record *key = recList.getKey();
    CHRPOS keyStart = key->getStartPos();
    CHRPOS keyEnd = key->getEndPos();

    _overlapCounts += recList.size();
    // note that we truncate to a max size of 2.1GB
    _qsizes.push_back((int)(keyEnd - keyStart));

    int hitIdx = 0;
    for (RecordKeyVector::iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
        CHRPOS maxStart = max((*iter)->getStartPos(), keyStart);
        CHRPOS minEnd = min((*iter)->getEndPos(), keyEnd);
        _qsizes.push_back((int)(minEnd - maxStart));
        if (_context->getObeySplits()) {
            intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx);
            hitIdx++;
        } else {
            intersection += (unsigned long)(minEnd - maxStart);
        }
    }
    _numIntersections += (int)recList.size();
    return intersection;
}