void BlockMgr::deleteBlocks(RecordKeyVector &keyList) { for (RecordKeyVector::iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) { _blockRecordsMgr->deleteRecord(*iter); } keyList.clearVector(); }
bool NewChromSweep::next(RecordKeyVector &retList) { retList.clearVector(); //make sure the first read of the query file is tested for chrom sort order. bool needTestSortOrder = false; if (_currQueryRec != NULL) { _queryFRM->deleteRecord(_currQueryRec); } else { needTestSortOrder = true; } if (!nextRecord(true)) return false; // query EOF hit retList.setKey(_currQueryRec); if (needTestSortOrder) testChromOrder(_currQueryRec); if (allCurrDBrecsNull() && allCachesEmpty() && !_runToQueryEnd) { _testLastQueryRec = true; return false; } _currQueryChromName = _currQueryRec->getChrName(); masterScan(retList); if (_context->getSortOutput()) { retList.sortVector(); } _prevQueryChromName = _currQueryChromName; return true; }
void FileRecordMergeMgr::deleteAllMergedItemsButKey(RecordKeyVector &recList) { //if the key is also in the list, this method won't delete it. for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { if (*iter == recList.getKey()) { continue; } deleteRecord(*iter); } recList.clearVector(); }
void CloseSweep::checkMultiDbs(RecordKeyVector &retList) { ContextClosest::tieModeType tieMode = _context->getTieMode(); if (_context->getMultiDbMode() == ContextClosest::ALL_DBS && _numDBs > 1) { _copyDists.clear(); _copyRetList.clearAll(); _copyRetList.setKey(retList.getKey()); //loop through retList, find min dist int minDist = INT_MAX; int i = 0; for (; i < (int)_finalDistances.size(); i++) { if (abs(_finalDistances[i]) < minDist) { minDist = abs(_finalDistances[i]); } } i=0; for (RecordKeyVector::const_iterator_type iter = retList.begin(); iter != retList.end(); iter++) { int dist = _finalDistances[i]; if (abs(dist) == minDist) { _copyDists.push_back(dist); _copyRetList.push_back(*iter); } i++; } retList.clearVector(); _finalDistances.clear(); if (_copyRetList.empty()) return; if (tieMode == ContextClosest::FIRST_TIE) { retList.push_back(*(_copyRetList.begin())); _finalDistances.push_back(_copyDists[0]); } else if (tieMode == ContextClosest::LAST_TIE) { retList.push_back(*(_copyRetList.begin() + _copyRetList.size() -1)); _finalDistances.push_back(_copyDists[_copyDists.size()-1]); } else { retList = _copyRetList; _finalDistances = _copyDists; } } }
void CloseSweep::checkMultiDbs(RecordKeyVector &retList) { // //can skip this method if there's only one DB, or if we are // //resolving closest hits for each db instead of all of them if (_context->getMultiDbMode() != ContextClosest::ALL_DBS || _numDBs == 1) return; // Get the K closest hits among multiple databases, // while not counting ties more than once if the tieMode // is "first" or "last". // Start by entering all hits and their absolute distances // into a vector of distance tuples, then sort it. vector<distanceTuple> copyDists; int numHits = (int)retList.size(); copyDists.resize(numHits); int i=0; for (RecordKeyVector::iterator_type iter = retList.begin(); iter != retList.end(); iter++) { int dist = _finalDistances[i]; copyDists[i]._dist = abs(dist); copyDists[i]._rec = *iter; copyDists[i]._isNeg = dist < 0; i++; } // sort the hits by distance sort(copyDists.begin(), copyDists.end(), DistanceTupleSortAscFunctor()); //now we want to build a map telling us what distances are tied, //and how many of each of these there are. Use a map<int, int>, //where the key is a distance (in absolute value) and the value //is the number of ties that that distance has. map<int, int> ties; for (vector<distanceTuple>::iterator i = copyDists.begin(); i != copyDists.end(); ++i) ++ties[i->_dist]; // Clear the original list and distances, and re-populate // until we have the desired number of hits, skipping // over any unwanted ties. retList.clearVector(); _finalDistances.clear(); int hitsUsed = 0; for (i=0; i < numHits && hitsUsed < _kClosest; i++) { int dist = copyDists[i]._dist; bool isNeg = copyDists[i]._isNeg; //see if this distance is tied with any other map<int, int>::iterator iter = ties.find(dist); if (iter != ties.end()) { //tie was found int numTies = iter->second; if (!_allTies) { if (_firstTie) { //just add the first of the ties addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); i += numTies - 1; // use first, then skip ahead by the number of ties, minus 1 because //loop is about to be incremented } else { //tieMode == LAST_TIE. Just add the last of the ties. i += numTies -1; dist = copyDists[i]._dist; isNeg = copyDists[i]._isNeg; addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); } } else { // tieMode is ALL_TIES, use all hits. for (int j = i; j < i + numTies; j++) { dist = copyDists[j]._dist; isNeg = copyDists[j]._isNeg; addSingleRec(copyDists[j]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); } i += numTies - 1; //skip ahead by the number of ties, minus 1 because //loop is about to be incremented } } else { addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList); } } }
void SubtractFile::subtractHits(RecordKeyVector &hits) { if (hits.empty()) { // no intersection, nothing to subtract. // just copy key to hits as if it were a // self-intersection. This is just for reporting // purposes. hits.push_back(hits.getKey()); return; } if (upCast(_context)->getRemoveAll() && upCast(_context)->getSubtractFraction() == 0.0) { // hits aren't empty, meaning there is intersection, // so we want to not report the hit. _dontReport = true; return; } //loop through hits. Track which bases in query were covered Record *keyRec = hits.getKey(); int keyStart = keyRec->getStartPos(); int keyEnd = keyRec->getEndPos(); //this vector of bools will represent the bases of the query. //for each base, true means uncovered, false means covered. //they begin as all uncovered. vector<bool> keyBases(keyEnd - keyStart, true); //now loop through the hits, and cover corresponding query bases //by setting them to false. bool basesRemoved = false; for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { Record *hitRec = *iter; int hitStart = hitRec->getStartPos(); int hitEnd = hitRec->getEndPos(); int startIdx = max(keyStart, hitStart) - keyStart; int endIdx = min(keyEnd, hitEnd) - keyStart; int keyLen = keyEnd - keyStart; int coveredLen = endIdx - startIdx; float coveragePct = (float)coveredLen / (float)keyLen; //for each base in the hit, set the base in the query to false. //this effectively "erases" the covered bits. Only do if (upCast(_context)->getRemoveSum() || coveragePct >= upCast(_context)->getSubtractFraction()) { std::fill(keyBases.begin() + startIdx, keyBases.begin() + endIdx, false); basesRemoved = true; } } if (!basesRemoved) { //treat as if there were no intersection hits.clearVector(); hits.push_back(hits.getKey()); return; } else if (upCast(_context)->getRemoveAll()) { _dontReport = true; return; } // if the -N option is used ( removeSum), do not report if the percentage of // uniquely covered bases exceeds the overlap fraction. if (upCast(_context)->getRemoveSum()) { //determine how many bases are left uncovered. int numBasesUncovered = std::accumulate(keyBases.begin(), keyBases.end(), 0); //determine percentage that are covered. float pctCovered = 1.0 - (float)numBasesUncovered / (float)(keyEnd - keyStart); if (pctCovered > upCast(_context)->getSubtractFraction()) { _dontReport = true; return; } else { hits.clearVector(); hits.push_back(hits.getKey()); } return; } //now make "blocks" out of the query's remaining stretches of //uncovered bases. hits.clearVector(); for (int i = 0; i < (int)keyBases.size(); i++) { if (keyBases[i] == true) { int blockStart = keyStart + i; while (keyBases[i] == true && i < (int)keyBases.size()) { i++; } int blockEnd = min(keyStart + i, keyEnd); hits.push_back(_tmpBlocksMgr->allocateAndAssignRecord(keyRec, blockStart, blockEnd)); } } _deleteTmpBlocks = true; }