SubtractFile::SubtractFile(ContextSubtract *context)
: IntersectFile(context),
  _tmpBlocksMgr(NULL),
  _deleteTmpBlocks(false),
  _dontReport(false)
{
	_tmpBlocksMgr = new BlockMgr(upCast(_context)->getOverlapFractionA(), upCast(_context)->getReciprocalFraction());
}
Beispiel #2
0
bool Fisher::init(void)
{
	if(!(upCast(_context)->getExcludeFile().empty())){
		string ex = upCast(_context)->getExcludeFile();
		_excludeFile = new BedFile(ex);
		_excludeFile->loadBedFileIntoMergedMap();
		_haveExclude = true;
	}
	return Jaccard::init();
}
Beispiel #3
0
bool IntersectFile::finalizeCalculations()
{
    if (upCast(_context)->getSortedInput() && !upCast(_context)->hasGenomeFile()) 
    {
        if (_context->getNameCheckDisabled())
            _sweep->closeOut(false);
        else
            _sweep->closeOut(true);
    }
    return true;
}
Beispiel #4
0
bool SampleFile::strandComplies(const Record * record) {
	if (!upCast(_context)->getSameStrand()) {
		return true;
	}
	if (upCast(_context)->getForwardOnly() && record->getStrandVal() == Record::FORWARD) {
		return true;
	}
	if (upCast(_context)->getReverseOnly() && record->getStrandVal() == Record::REVERSE) {
		return true;
	}
	return false;
}
Beispiel #5
0
bool IntersectFile::init() {

	_queryFRM = upCast(_context)->getFile(upCast(_context)->getQueryFileIdx());

	 if (upCast(_context)->getSortedInput()) {
		 makeSweep();
		return _sweep->init();
	 } else {
		_binTree = new BinTree( upCast(_context));
		_binTree->loadDB();
	 }

	 return true;
}
bool GroupBy::init()
{
	Tokenizer groupColsTokens;
	groupColsTokens.tokenize(upCast(_context)->getGroupCols(), ',');
	int numElems = groupColsTokens.getNumValidElems();
	for (int i=0; i < numElems; i++) {
		//if the item is a range, such as 3-5,
		//must split that as well.
		const QuickString &elem = groupColsTokens.getElem(i);

		if (strchr(elem.c_str(), '-')) {
			Tokenizer rangeElems;
			rangeElems.tokenize(elem, '-');
			int startNum = str2chrPos(rangeElems.getElem(0));
			int endNum = str2chrPos(rangeElems.getElem(1));
			for (int i=startNum; i <= endNum; i++) {
				_groupCols.push_back(i);
			}
		} else {
			_groupCols.push_back(str2chrPos(elem));
		}
	}
	_queryFRM = _context->getFile(0);
	_prevFields.resize(_groupCols.size());

	_prevRecord = getNextRecord();
	return true;
}
Beispiel #7
0
unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList)
{
    unsigned long intersection = 0;
    Record *key = recList.getKey();
    CHRPOS keyStart = key->getStartPos();
    CHRPOS keyEnd = key->getEndPos();

    _overlapCounts += recList.size();
    // note that we truncate to a max size of 2.1GB
    _qsizes.push_back((int)(keyEnd - keyStart));

    int hitIdx = 0;
    for (RecordKeyVector::iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) {
        CHRPOS maxStart = max((*iter)->getStartPos(), keyStart);
        CHRPOS minEnd = min((*iter)->getEndPos(), keyEnd);
        _qsizes.push_back((int)(minEnd - maxStart));
        if (_context->getObeySplits()) {
            intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx);
            hitIdx++;
        } else {
            intersection += (unsigned long)(minEnd - maxStart);
        }
    }
    _numIntersections += (int)recList.size();
    return intersection;
}
Beispiel #8
0
void CoverageFile::giveFinalReport(RecordOutputMgr *outputMgr) {

	//only give report for histogram option
	if (upCast(_context)->getCoverageType() != ContextCoverage::HIST) {
		return;
	}

	for (depthMapType::iterator iter = _finalDepthMap.begin(); iter != _finalDepthMap.end(); iter++) {
		size_t depth = iter->first;
		size_t basesAtDepth = iter->second;
		float depthPct = (float)basesAtDepth / (float)_totalQueryLen;

		_finalOutput = "all\t";
		_finalOutput.append(depth);
		_finalOutput.append("\t");
		_finalOutput.append(basesAtDepth);
		_finalOutput.append("\t");
		_finalOutput.append(_totalQueryLen);
		_finalOutput.append("\t");
		format(depthPct);

		outputMgr->printRecord(NULL, _finalOutput);
	}

}
Beispiel #9
0
void CoverageFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) {
	   makeDepthCount(hits);
	   _finalOutput.clear();

	   switch(upCast(_context)->getCoverageType()) {
	   case ContextCoverage::COUNT:
		   doCounts(outputMgr, hits);
		   break;

	   case ContextCoverage::PER_BASE:
		   doPerBase(outputMgr, hits);
		   break;

	   case ContextCoverage::MEAN:
		   doMean(outputMgr, hits);
		   break;

	   case ContextCoverage::HIST:
		   doHist(outputMgr, hits);
		   break;

	   case ContextCoverage::DEFAULT:
	   default:
		   doDefault(outputMgr, hits);
		   break;

	   }

}
Beispiel #10
0
IntersectFile::IntersectFile(ContextIntersect *context)
: ToolBase(upCast(context)),
  _sweep(NULL),
  _binTree(NULL),
  _queryFRM(NULL)
{

}
Beispiel #11
0
void IntersectFile::checkSplits(RecordKeyVector &hitSet)
{
	if (upCast(_context)->getObeySplits()) {
		RecordKeyVector keySet(hitSet.getKey());
		RecordKeyVector resultSet(hitSet.getKey());
		RecordKeyVector overlapSet(hitSet.getKey());
		upCast(_context)->getSplitBlockInfo()->findBlockedOverlaps(keySet, hitSet, resultSet, overlapSet);
		
		// when using coverage, we need a list of the sub-intervals of coverage
		// so that per-base depth can be properly calculated when obeying splits
		if (_context->getProgram() == ContextBase::COVERAGE)
		{
			hitSet.swap(overlapSet);
		}
		else {
			hitSet.swap(resultSet);
		}
	}
}
Beispiel #12
0
bool IntersectFile::findNext(RecordKeyVector &hits)
{
	 bool retVal = false;
	 if (upCast(_context)->getSortedInput()) {
		retVal = nextSortedFind(hits);
	 } 
	 else {
		retVal = nextUnsortedFind(hits);
	 }

	 if (retVal) {
		 checkSplits(hits);
	 }
	 return retVal;
}
bool GroupBy::canGroup(const Record *newRecord) {

	for (int i=0; i < (int)_groupCols.size(); i++) {
		int fieldNum = _groupCols[i];
		const QuickString &newField = newRecord->getField(fieldNum);
		const QuickString &oldField = _prevFields[i];
		if (upCast(_context)->ignoreCase()) {
			if (oldField.stricmp(newField)) return false;
		} else {
			if (oldField != newField) return false;
		}
	}
	return true;

}
void GroupBy::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
{

	const Record *rec = hits.getKey();
	const QuickString &opVal  = _context->getColumnOpsVal(hits);
	if (upCast(_context)->printFullCols()) {
		outputMgr->printRecord(rec, opVal);
	} else {
		QuickString outBuf;
		for (int i=0; i < (int)_groupCols.size(); i++) {
			outBuf.append(rec->getField(_groupCols[i]));
			outBuf.append('\t');
		}
		outBuf.append(opVal);
		outputMgr->printRecord(NULL, outBuf);
	}
}
Beispiel #15
0
unsigned long Jaccard::getTotalIntersection(RecordKeyVector &hits)
{
	unsigned long intersection = 0;
	Record *key = hits.getKey();
	CHRPOS keyStart = key->getStartPos();
	CHRPOS keyEnd = key->getEndPos();

	int hitIdx = 0;
	for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
		Record *currRec = *iter;
		CHRPOS maxStart = max(currRec->getStartPos(), keyStart);
		CHRPOS minEnd = min(currRec->getEndPos(), keyEnd);
		if (_context->getObeySplits()) {
			intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx);
			hitIdx++;
		} else {
			intersection += (unsigned long)(minEnd - maxStart);
		}
	}
	_numIntersections += (int)hits.size();
	return intersection;
}
Beispiel #16
0
SkOpSpanBase* SkOpSpanBase::active() {
    SkOpSpanBase* result = fPrev ? fPrev->next() : upCast()->next()->prev();
    SkASSERT(this == result || fDebugDeleted);
    return result;
}
Beispiel #17
0
void IntersectFile::makeSweep() {
	_sweep = new NewChromSweep(upCast(_context));
}
Beispiel #18
0
bool ComplementFile::init()
{
	_frm = static_cast<FileRecordMergeMgr *>(upCast(_context)->getFile(0));
	return true;
}
Beispiel #19
0
void SubtractFile::subtractHits(RecordKeyVector &hits) {
	if (hits.empty()) {
        // no intersection, nothing to subtract.
        // just copy key to hits as if it were a
        // self-intersection. This is just for reporting
        // purposes.
        hits.push_back(hits.getKey());
		return;
	}

	if (upCast(_context)->getRemoveAll() && upCast(_context)->getSubtractFraction() == 0.0) {
		// hits aren't empty, meaning there is intersection,
		// so we want to not report the hit.
		_dontReport = true;
		return;
	}

	//loop through hits. Track which bases in query were covered
	Record *keyRec = hits.getKey();
	int keyStart = keyRec->getStartPos();
	int keyEnd = keyRec->getEndPos();

	//this vector of bools will represent the bases of the query.
	//for each base, true means uncovered, false means covered.
	//they begin as all uncovered.
	vector<bool> keyBases(keyEnd - keyStart, true);

	//now loop through the hits, and cover corresponding query bases
	//by setting them to false.
	bool basesRemoved = false;
	for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
		Record *hitRec = *iter;
		int hitStart = hitRec->getStartPos();
		int hitEnd = hitRec->getEndPos();

		int startIdx = max(keyStart, hitStart) - keyStart;
		int endIdx = min(keyEnd, hitEnd) - keyStart;

		int keyLen = keyEnd - keyStart;
		int coveredLen = endIdx - startIdx;
		float coveragePct = (float)coveredLen / (float)keyLen;
		//for each base in the hit, set the base in the query to false.
		//this effectively "erases" the covered bits. Only do
		if (upCast(_context)->getRemoveSum() || coveragePct >= upCast(_context)->getSubtractFraction()) {
			std::fill(keyBases.begin() + startIdx, keyBases.begin() + endIdx, false);
			basesRemoved = true;
		}
	}

	if (!basesRemoved) {
		//treat as if there were no intersection
		hits.clearVector();
		hits.push_back(hits.getKey());
		return;
	} else if (upCast(_context)->getRemoveAll()) {
		_dontReport = true;
		return;
	}
	// if the -N option is used ( removeSum), do not report if the percentage of
	// uniquely covered bases exceeds the overlap fraction.
	if (upCast(_context)->getRemoveSum()) {
		//determine how many bases are left uncovered.
		int numBasesUncovered = std::accumulate(keyBases.begin(), keyBases.end(), 0);
		//determine percentage that are covered.
		float pctCovered = 1.0 - (float)numBasesUncovered / (float)(keyEnd - keyStart);
		if (pctCovered > upCast(_context)->getSubtractFraction()) {
			_dontReport = true;
			return;
		} else {
            hits.clearVector();
            hits.push_back(hits.getKey());
        }
		return;
	}

	//now make "blocks" out of the query's remaining stretches of
	//uncovered bases.
	hits.clearVector();
    for (int i = 0; i < (int)keyBases.size(); i++) {
        if (keyBases[i] == true) {
            int blockStart = keyStart + i;
            while (keyBases[i] == true && i < (int)keyBases.size()) {
                i++;
            }
            int blockEnd = min(keyStart + i, keyEnd);
            hits.push_back(_tmpBlocksMgr->allocateAndAssignRecord(keyRec, blockStart, blockEnd));
        }
    }
    _deleteTmpBlocks = true;

}
Beispiel #20
0
Array<T>* createScalarNode(const dim4 &size, const T val)
{
    JIT::ScalarNode *node = NULL;
    node = new JIT::ScalarNode(upCast(val), isDouble<T>());
    return createNodeArray<T>(size, reinterpret_cast<JIT::Node *>(node));
}