Exemple #1
0
void BlockMgr::deleteBlocks(RecordKeyVector &keyList)
{
	for (RecordKeyVector::iterator_type iter = keyList.begin(); iter != keyList.end(); iter = keyList.next()) {
		_blockRecordsMgr->deleteRecord(*iter);
	}
	keyList.clearVector();
}
bool NewChromSweep::next(RecordKeyVector &retList) {
	retList.clearVector();


	//make sure the first read of the query file is tested for chrom sort order.
	bool needTestSortOrder = false;
	if (_currQueryRec != NULL) {
		_queryFRM->deleteRecord(_currQueryRec);
	} else {
		needTestSortOrder = true;
	}

	if (!nextRecord(true)) return false; // query EOF hit
	retList.setKey(_currQueryRec);

	if (needTestSortOrder) testChromOrder(_currQueryRec);

	if (allCurrDBrecsNull() && allCachesEmpty() && !_runToQueryEnd) {
		_testLastQueryRec = true;
		return false;
	}
	_currQueryChromName = _currQueryRec->getChrName();

	masterScan(retList);

	if (_context->getSortOutput()) {
		retList.sortVector();
	}

	_prevQueryChromName = _currQueryChromName;
	return true;
}
void FileRecordMergeMgr::deleteAllMergedItemsButKey(RecordKeyVector &recList) {
	//if the key is also in the list, this method won't delete it.
	for (RecordKeyVector::const_iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
		if (*iter == recList.getKey()) {
			continue;
		}
		deleteRecord(*iter);
	}
	recList.clearVector();
}
Exemple #4
0
void CloseSweep::checkMultiDbs(RecordKeyVector &retList) {
	ContextClosest::tieModeType tieMode = _context->getTieMode();

	if (_context->getMultiDbMode() == ContextClosest::ALL_DBS && _numDBs > 1) {
		_copyDists.clear();
		_copyRetList.clearAll();
		_copyRetList.setKey(retList.getKey());
		//loop through retList, find min dist
		int minDist = INT_MAX;
		int i = 0;
		for (; i < (int)_finalDistances.size(); i++) {
			if (abs(_finalDistances[i]) < minDist) {
				minDist = abs(_finalDistances[i]);
			}
		}
		i=0;
		for (RecordKeyVector::const_iterator_type iter = retList.begin(); iter != retList.end(); iter++) {
			int dist = _finalDistances[i];
			if (abs(dist) == minDist) {
				_copyDists.push_back(dist);
				_copyRetList.push_back(*iter);
			}
			i++;
		}

		retList.clearVector();
		_finalDistances.clear();

		if (_copyRetList.empty()) return;

		if (tieMode == ContextClosest::FIRST_TIE) {
			retList.push_back(*(_copyRetList.begin()));
			_finalDistances.push_back(_copyDists[0]);
		} else if (tieMode == ContextClosest::LAST_TIE) {
			retList.push_back(*(_copyRetList.begin() + _copyRetList.size() -1));
			_finalDistances.push_back(_copyDists[_copyDists.size()-1]);
		} else {

			retList = _copyRetList;
			_finalDistances = _copyDists;
		}
	}
}
Exemple #5
0
void CloseSweep::checkMultiDbs(RecordKeyVector &retList) {
//	//can skip this method if there's only one DB, or if we are
//	//resolving closest hits for each db instead of all of them
    if (_context->getMultiDbMode() != ContextClosest::ALL_DBS ||  _numDBs == 1) return;


    // Get the K closest hits among multiple databases,
    // while not counting ties more than once if the tieMode
    // is "first" or "last".
    // Start by entering  all hits and their absolute distances
    // into a vector of distance tuples, then sort it.

    vector<distanceTuple> copyDists;
    int numHits = (int)retList.size();
    copyDists.resize(numHits);
    int i=0;
    for (RecordKeyVector::iterator_type iter = retList.begin(); iter != retList.end(); iter++) {
        int dist = _finalDistances[i];
        copyDists[i]._dist = abs(dist);
        copyDists[i]._rec = *iter;
        copyDists[i]._isNeg = dist < 0;
        i++;
    }

    // sort the hits by distance
    sort(copyDists.begin(), copyDists.end(), DistanceTupleSortAscFunctor());

    //now we want to build a map telling us what distances are tied,
    //and how many of each of these there are. Use a map<int, int>,
    //where the key is a distance (in absolute value) and the value
    //is the number of ties that that distance has.
    map<int, int> ties;
    for (vector<distanceTuple>::iterator i = copyDists.begin(); i != copyDists.end(); ++i)
        ++ties[i->_dist];

    // Clear the original list and distances, and re-populate
    // until we have the desired number of hits, skipping
    // over any unwanted ties.
    retList.clearVector();
    _finalDistances.clear();

    int hitsUsed = 0;
    for (i=0; i < numHits && hitsUsed < _kClosest; i++) {
        int dist = copyDists[i]._dist;
        bool isNeg = copyDists[i]._isNeg;
        //see if this distance is tied with any other
        map<int, int>::iterator iter = ties.find(dist);
        if (iter != ties.end()) {
            //tie was found
            int numTies = iter->second;
            if (!_allTies) {
                if (_firstTie) {
                    //just add the first of the ties
                    addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
                    i += numTies - 1; // use first, then skip ahead by the number of ties, minus 1 because
                    //loop is about to be incremented
                } else { //tieMode == LAST_TIE. Just add the last of the ties.
                    i += numTies -1;
                    dist = copyDists[i]._dist;
                    isNeg = copyDists[i]._isNeg;
                    addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
                }
            } else {
                // tieMode is ALL_TIES, use all hits.
                for (int j = i; j < i + numTies; j++) {
                    dist = copyDists[j]._dist;
                    isNeg = copyDists[j]._isNeg;
                    addSingleRec(copyDists[j]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
                }
                i += numTies - 1; //skip ahead by the number of ties, minus 1 because
                //loop is about to be incremented
            }
        } else {
            addSingleRec(copyDists[i]._rec, (isNeg ? 0 - dist : dist), hitsUsed, retList);
        }
    }
}
void SubtractFile::subtractHits(RecordKeyVector &hits) {
	if (hits.empty()) {
        // no intersection, nothing to subtract.
        // just copy key to hits as if it were a
        // self-intersection. This is just for reporting
        // purposes.
        hits.push_back(hits.getKey());
		return;
	}

	if (upCast(_context)->getRemoveAll() && upCast(_context)->getSubtractFraction() == 0.0) {
		// hits aren't empty, meaning there is intersection,
		// so we want to not report the hit.
		_dontReport = true;
		return;
	}

	//loop through hits. Track which bases in query were covered
	Record *keyRec = hits.getKey();
	int keyStart = keyRec->getStartPos();
	int keyEnd = keyRec->getEndPos();

	//this vector of bools will represent the bases of the query.
	//for each base, true means uncovered, false means covered.
	//they begin as all uncovered.
	vector<bool> keyBases(keyEnd - keyStart, true);

	//now loop through the hits, and cover corresponding query bases
	//by setting them to false.
	bool basesRemoved = false;
	for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
		Record *hitRec = *iter;
		int hitStart = hitRec->getStartPos();
		int hitEnd = hitRec->getEndPos();

		int startIdx = max(keyStart, hitStart) - keyStart;
		int endIdx = min(keyEnd, hitEnd) - keyStart;

		int keyLen = keyEnd - keyStart;
		int coveredLen = endIdx - startIdx;
		float coveragePct = (float)coveredLen / (float)keyLen;
		//for each base in the hit, set the base in the query to false.
		//this effectively "erases" the covered bits. Only do
		if (upCast(_context)->getRemoveSum() || coveragePct >= upCast(_context)->getSubtractFraction()) {
			std::fill(keyBases.begin() + startIdx, keyBases.begin() + endIdx, false);
			basesRemoved = true;
		}
	}

	if (!basesRemoved) {
		//treat as if there were no intersection
		hits.clearVector();
		hits.push_back(hits.getKey());
		return;
	} else if (upCast(_context)->getRemoveAll()) {
		_dontReport = true;
		return;
	}
	// if the -N option is used ( removeSum), do not report if the percentage of
	// uniquely covered bases exceeds the overlap fraction.
	if (upCast(_context)->getRemoveSum()) {
		//determine how many bases are left uncovered.
		int numBasesUncovered = std::accumulate(keyBases.begin(), keyBases.end(), 0);
		//determine percentage that are covered.
		float pctCovered = 1.0 - (float)numBasesUncovered / (float)(keyEnd - keyStart);
		if (pctCovered > upCast(_context)->getSubtractFraction()) {
			_dontReport = true;
			return;
		} else {
            hits.clearVector();
            hits.push_back(hits.getKey());
        }
		return;
	}

	//now make "blocks" out of the query's remaining stretches of
	//uncovered bases.
	hits.clearVector();
    for (int i = 0; i < (int)keyBases.size(); i++) {
        if (keyBases[i] == true) {
            int blockStart = keyStart + i;
            while (keyBases[i] == true && i < (int)keyBases.size()) {
                i++;
            }
            int blockEnd = min(keyStart + i, keyEnd);
            hits.push_back(_tmpBlocksMgr->allocateAndAssignRecord(keyRec, blockStart, blockEnd));
        }
    }
    _deleteTmpBlocks = true;

}