unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList) { unsigned long intersection = 0; Record *key = recList.getKey(); CHRPOS keyStart = key->getStartPos(); CHRPOS keyEnd = key->getEndPos(); _overlapCounts += recList.size(); // note that we truncate to a max size of 2.1GB _qsizes.push_back((int)(keyEnd - keyStart)); int hitIdx = 0; for (RecordKeyVector::iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { CHRPOS maxStart = max((*iter)->getStartPos(), keyStart); CHRPOS minEnd = min((*iter)->getEndPos(), keyEnd); _qsizes.push_back((int)(minEnd - maxStart)); if (_context->getObeySplits()) { intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx); hitIdx++; } else { intersection += (unsigned long)(minEnd - maxStart); } } _numIntersections += (int)recList.size(); return intersection; }
void RecordOutputMgr::reportOverlapSummary(RecordKeyVector &keyList) { int numOverlapsFound = (int)keyList.size(); if ((static_cast<ContextIntersect *>(_context))->getAnyHit() && numOverlapsFound > 0) { if (printKeyAndTerminate(keyList)) { return; } newline(); if (needsFlush()) flush(); } else if ((static_cast<ContextIntersect *>(_context))->getWriteCount()) { if (printKeyAndTerminate(keyList)) { return; } tab(); int2str(numOverlapsFound, _outBuf, true); newline(); if (needsFlush()) flush(); } else if ((static_cast<ContextIntersect *>(_context))->getNoHit() && numOverlapsFound == 0) { if (printKeyAndTerminate(keyList)) { return; } newline(); if (needsFlush()) flush(); } }
int BlockMgr::findBlockedOverlaps(RecordKeyVector &keyList, RecordKeyVector &hitList, RecordKeyVector &resultList) { bool deleteKeyBlocks = false; if (keyList.empty()) { //get all the blocks for the query record, put them in it's list. getBlocks(keyList, deleteKeyBlocks); } _overlapBases.clear(); int keyBlocksSumLength = getTotalBlockLength(keyList); //Loop through every database record the query intersected with for (RecordKeyVector::const_iterator_type hitListIter = hitList.begin(); hitListIter != hitList.end(); hitListIter = hitList.next()) { RecordKeyVector hitBlocks(*hitListIter); bool deleteHitBlocks = false; getBlocks(hitBlocks, deleteHitBlocks); //get all blocks for the hit record. int hitBlockSumLength = getTotalBlockLength(hitBlocks); //get total length of the bocks for the hitRecord. int totalHitOverlap = 0; bool hitHasOverlap = false; //loop through every block of the database record. for (RecordKeyVector::const_iterator_type hitBlockIter = hitBlocks.begin(); hitBlockIter != hitBlocks.end(); hitBlockIter = hitBlocks.next()) { //loop through every block of the query record. for (RecordKeyVector::const_iterator_type keyListIter = keyList.begin(); keyListIter != keyList.end(); keyListIter = keyList.next()) { const Record *keyBlock = *keyListIter; const Record *hitBlock = *hitBlockIter; int maxStart = max(keyBlock->getStartPos(), hitBlock->getStartPos()); int minEnd = min(keyBlock->getEndPos(), hitBlock->getEndPos()); int overlap = minEnd - maxStart; if (overlap > 0) { hitHasOverlap = true; totalHitOverlap += overlap; } } } if (hitHasOverlap) { if ((float) totalHitOverlap / (float)keyBlocksSumLength >= _overlapFraction) { if (_hasReciprocal && ((float)totalHitOverlap / (float)hitBlockSumLength >= _overlapFraction)) { _overlapBases.push_back(totalHitOverlap); resultList.push_back(*hitListIter); } else if (!_hasReciprocal) { _overlapBases.push_back(totalHitOverlap); resultList.push_back(*hitListIter); } } } if (deleteHitBlocks) { deleteBlocks(hitBlocks); } } if (deleteKeyBlocks) { deleteBlocks(keyList); } resultList.setKey(keyList.getKey()); return (int)resultList.size(); }
void CoverageFile::doDefault(RecordOutputMgr *outputMgr, RecordKeyVector &hits) { size_t nonZeroBases = _queryLen - countBasesAtDepth(0); float coveredBases = (float)nonZeroBases / (float)_queryLen; _finalOutput = hits.size(); _finalOutput.append("\t"); _finalOutput.append(nonZeroBases); _finalOutput.append("\t"); _finalOutput.append(_queryLen); _finalOutput.append("\t"); format(coveredBases); outputMgr->printRecord(hits.getKey(), _finalOutput); }
unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList) { unsigned long intersection = 0; const Record *key = recList.getKey(); int keyStart = key->getStartPos(); int keyEnd = key->getEndPos(); int hitIdx = 0; for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { int maxStart = max((*iter)->getStartPos(), keyStart); int minEnd = min((*iter)->getEndPos(), keyEnd); if (_context->getObeySplits()) { intersection += _blockMgr->getOverlapBases(hitIdx); hitIdx++; } else { intersection += (unsigned long)(minEnd - maxStart); } } _numIntersections += (int)recList.size(); return intersection; }
unsigned long Jaccard::getTotalIntersection(RecordKeyVector &hits) { unsigned long intersection = 0; Record *key = hits.getKey(); CHRPOS keyStart = key->getStartPos(); CHRPOS keyEnd = key->getEndPos(); int hitIdx = 0; for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { Record *currRec = *iter; CHRPOS maxStart = max(currRec->getStartPos(), keyStart); CHRPOS minEnd = min(currRec->getEndPos(), keyEnd); if (_context->getObeySplits()) { intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx); hitIdx++; } else { intersection += (unsigned long)(minEnd - maxStart); } } _numIntersections += (int)hits.size(); return intersection; }
void CoverageFile::doCounts(RecordOutputMgr *outputMgr, RecordKeyVector &hits) { _finalOutput = hits.size(); outputMgr->printRecord(hits.getKey(), _finalOutput); }
int BlockMgr::findBlockedOverlaps(RecordKeyVector &hitList, bool useOverlappingSubBlocks) { RecordKeyVector keyList(hitList.getKey()); bool deleteKeyBlocks = true; getBlocks(keyList, deleteKeyBlocks); _overlapBases.clear(); int keyBlocksSumLength = getTotalBlockLength(keyList); //Loop through every database record the query intersected with RecordKeyVector::iterator_type hitListIter = hitList.begin(); for (; hitListIter != hitList.end();) { RecordKeyVector hitBlocks(*hitListIter); bool deleteHitBlocks = false; getBlocks(hitBlocks, deleteHitBlocks); //get all blocks for the hit record. int hitBlockSumLength = getTotalBlockLength(hitBlocks); //get total length of the bocks for the hitRecord. int totalHitOverlap = 0; bool hitHasOverlap = false; //loop through every block of the database record. RecordKeyVector::iterator_type hitBlockIter = hitBlocks.begin(); for (; hitBlockIter != hitBlocks.end(); hitBlockIter = hitBlocks.next()) { //loop through every block of the query record. RecordKeyVector::iterator_type keyListIter = keyList.begin(); for (; keyListIter != keyList.end(); keyListIter = keyList.next()) { const Record *keyBlock = *keyListIter; const Record *hitBlock = *hitBlockIter; int maxStart = max(keyBlock->getStartPos(), hitBlock->getStartPos()); int minEnd = min(keyBlock->getEndPos(), hitBlock->getEndPos()); int overlap = minEnd - maxStart; if (overlap > 0) { hitHasOverlap = true; totalHitOverlap += overlap; if (useOverlappingSubBlocks == true) { (*hitListIter)->block_starts.push_back(maxStart); (*hitListIter)->block_ends.push_back(minEnd); } } } } if (hitHasOverlap && useOverlappingSubBlocks == false) { bool enoughKeyOverlap = (float) totalHitOverlap / (float) keyBlocksSumLength >= _overlapFraction; bool enoughHitOverlap = (float) totalHitOverlap / (float) hitBlockSumLength >= _overlapFraction; if (enoughKeyOverlap) { if (_hasReciprocal && enoughHitOverlap) { //(*hitListIter)->setValid(true); _overlapBases.push_back(totalHitOverlap); hitListIter = hitList.next(); } else if (_hasReciprocal && !enoughHitOverlap) { hitList.erase(); //(*hitListIter)->setValid(false); } else if (!_hasReciprocal) { //(*hitListIter)->setValid(true); _overlapBases.push_back(totalHitOverlap); hitListIter = hitList.next(); } } else { hitList.erase(); //(*hitListIter)->setValid(false); } } else if (!hitHasOverlap && useOverlappingSubBlocks == false) { hitList.erase(); //(*hitListIter)->setValid(false); } else { hitListIter = hitList.next(); } if (deleteHitBlocks) { deleteBlocks(hitBlocks); } } // end for loop through main hits if (deleteKeyBlocks) { deleteBlocks(keyList); } return (int)hitList.size(); }
void CloseSweep::checkMultiDbs(RecordKeyVector &retList) { // //can skip this method if there's only one DB, or if we are // //resolving closest hits for each db instead of all of them if (_context->getMultiDbMode() != ContextClosest::ALL_DBS || _numDBs == 1) return; // Get the K closest hits among multiple databases, // while not counting ties more than once if the tieMode // is "first" or "last". // Start by entering all hits and their absolute distances // into a vector of distance tuples, then sort it. vector<distanceTuple> copyDists; int numHits = (int)retList.size(); copyDists.resize(numHits); int i=0; for (RecordKeyVector::iterator_type iter = retList.begin(); iter != retList.end(); iter++) { int dist = _finalDistances[i]; copyDists[i]._dist = abs(dist); copyDists[i]._rec = *iter; copyDists[i]._isNeg = dist < 0; i++; } // sort the hits by distance sort(copyDists.begin(), copyDists.end(), DistanceTupleSortAscFunctor()); //now we want to build a map telling us what distances are tied, //and how many of each of these there are. Use a map<int, int>, //where the key is a distance (in absolute value) and the value //is the number of ties that that distance has. map<int, int> ties; for (vector<distanceTuple>::iterator i = copyDists.begin(); i != copyDists.end(); ++i) ++ties[i->_dist]; // Clear the original list and distances, and re-populate // until we have the desired number of hits, skipping // over any unwanted ties. retList.clearVector(); _finalDistances.clear(); int hitsUsed = 0; for (i=0; i < numHits && hitsUsed < _kClosest; i++) { int dist = copyDists[i]._dist; bool isNeg = copyDists[i]._isNeg; //see if this distance is tied with any other map<int, int>::iterator iter = ties.find(dist); if (iter != ties.end()) { //tie was found int numTies = iter->second; if (!_allTies) { if (_firstTie) { //just add the first of the ties addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); i += numTies - 1; // use first, then skip ahead by the number of ties, minus 1 because //loop is about to be incremented } else { //tieMode == LAST_TIE. Just add the last of the ties. i += numTies -1; dist = copyDists[i]._dist; isNeg = copyDists[i]._isNeg; addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); } } else { // tieMode is ALL_TIES, use all hits. for (int j = i; j < i + numTies; j++) { dist = copyDists[j]._dist; isNeg = copyDists[j]._isNeg; addSingleRec(copyDists[j]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); } i += numTies - 1; //skip ahead by the number of ties, minus 1 because //loop is about to be incremented } } else { addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); } } }