SubtractFile::SubtractFile(ContextSubtract *context) : IntersectFile(context), _tmpBlocksMgr(NULL), _deleteTmpBlocks(false), _dontReport(false) { _tmpBlocksMgr = new BlockMgr(upCast(_context)->getOverlapFractionA(), upCast(_context)->getReciprocalFraction()); }
bool Fisher::init(void) { if(!(upCast(_context)->getExcludeFile().empty())){ string ex = upCast(_context)->getExcludeFile(); _excludeFile = new BedFile(ex); _excludeFile->loadBedFileIntoMergedMap(); _haveExclude = true; } return Jaccard::init(); }
bool IntersectFile::finalizeCalculations() { if (upCast(_context)->getSortedInput() && !upCast(_context)->hasGenomeFile()) { if (_context->getNameCheckDisabled()) _sweep->closeOut(false); else _sweep->closeOut(true); } return true; }
bool SampleFile::strandComplies(const Record * record) { if (!upCast(_context)->getSameStrand()) { return true; } if (upCast(_context)->getForwardOnly() && record->getStrandVal() == Record::FORWARD) { return true; } if (upCast(_context)->getReverseOnly() && record->getStrandVal() == Record::REVERSE) { return true; } return false; }
bool IntersectFile::init() { _queryFRM = upCast(_context)->getFile(upCast(_context)->getQueryFileIdx()); if (upCast(_context)->getSortedInput()) { makeSweep(); return _sweep->init(); } else { _binTree = new BinTree( upCast(_context)); _binTree->loadDB(); } return true; }
bool GroupBy::init() { Tokenizer groupColsTokens; groupColsTokens.tokenize(upCast(_context)->getGroupCols(), ','); int numElems = groupColsTokens.getNumValidElems(); for (int i=0; i < numElems; i++) { //if the item is a range, such as 3-5, //must split that as well. const QuickString &elem = groupColsTokens.getElem(i); if (strchr(elem.c_str(), '-')) { Tokenizer rangeElems; rangeElems.tokenize(elem, '-'); int startNum = str2chrPos(rangeElems.getElem(0)); int endNum = str2chrPos(rangeElems.getElem(1)); for (int i=startNum; i <= endNum; i++) { _groupCols.push_back(i); } } else { _groupCols.push_back(str2chrPos(elem)); } } _queryFRM = _context->getFile(0); _prevFields.resize(_groupCols.size()); _prevRecord = getNextRecord(); return true; }
unsigned long Fisher::getTotalIntersection(RecordKeyVector &recList) { unsigned long intersection = 0; Record *key = recList.getKey(); CHRPOS keyStart = key->getStartPos(); CHRPOS keyEnd = key->getEndPos(); _overlapCounts += recList.size(); // note that we truncate to a max size of 2.1GB _qsizes.push_back((int)(keyEnd - keyStart)); int hitIdx = 0; for (RecordKeyVector::iterator_type iter = recList.begin(); iter != recList.end(); iter = recList.next()) { CHRPOS maxStart = max((*iter)->getStartPos(), keyStart); CHRPOS minEnd = min((*iter)->getEndPos(), keyEnd); _qsizes.push_back((int)(minEnd - maxStart)); if (_context->getObeySplits()) { intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx); hitIdx++; } else { intersection += (unsigned long)(minEnd - maxStart); } } _numIntersections += (int)recList.size(); return intersection; }
void CoverageFile::giveFinalReport(RecordOutputMgr *outputMgr) { //only give report for histogram option if (upCast(_context)->getCoverageType() != ContextCoverage::HIST) { return; } for (depthMapType::iterator iter = _finalDepthMap.begin(); iter != _finalDepthMap.end(); iter++) { size_t depth = iter->first; size_t basesAtDepth = iter->second; float depthPct = (float)basesAtDepth / (float)_totalQueryLen; _finalOutput = "all\t"; _finalOutput.append(depth); _finalOutput.append("\t"); _finalOutput.append(basesAtDepth); _finalOutput.append("\t"); _finalOutput.append(_totalQueryLen); _finalOutput.append("\t"); format(depthPct); outputMgr->printRecord(NULL, _finalOutput); } }
void CoverageFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) { makeDepthCount(hits); _finalOutput.clear(); switch(upCast(_context)->getCoverageType()) { case ContextCoverage::COUNT: doCounts(outputMgr, hits); break; case ContextCoverage::PER_BASE: doPerBase(outputMgr, hits); break; case ContextCoverage::MEAN: doMean(outputMgr, hits); break; case ContextCoverage::HIST: doHist(outputMgr, hits); break; case ContextCoverage::DEFAULT: default: doDefault(outputMgr, hits); break; } }
IntersectFile::IntersectFile(ContextIntersect *context) : ToolBase(upCast(context)), _sweep(NULL), _binTree(NULL), _queryFRM(NULL) { }
void IntersectFile::checkSplits(RecordKeyVector &hitSet) { if (upCast(_context)->getObeySplits()) { RecordKeyVector keySet(hitSet.getKey()); RecordKeyVector resultSet(hitSet.getKey()); RecordKeyVector overlapSet(hitSet.getKey()); upCast(_context)->getSplitBlockInfo()->findBlockedOverlaps(keySet, hitSet, resultSet, overlapSet); // when using coverage, we need a list of the sub-intervals of coverage // so that per-base depth can be properly calculated when obeying splits if (_context->getProgram() == ContextBase::COVERAGE) { hitSet.swap(overlapSet); } else { hitSet.swap(resultSet); } } }
bool IntersectFile::findNext(RecordKeyVector &hits) { bool retVal = false; if (upCast(_context)->getSortedInput()) { retVal = nextSortedFind(hits); } else { retVal = nextUnsortedFind(hits); } if (retVal) { checkSplits(hits); } return retVal; }
bool GroupBy::canGroup(const Record *newRecord) { for (int i=0; i < (int)_groupCols.size(); i++) { int fieldNum = _groupCols[i]; const QuickString &newField = newRecord->getField(fieldNum); const QuickString &oldField = _prevFields[i]; if (upCast(_context)->ignoreCase()) { if (oldField.stricmp(newField)) return false; } else { if (oldField != newField) return false; } } return true; }
void GroupBy::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) { const Record *rec = hits.getKey(); const QuickString &opVal = _context->getColumnOpsVal(hits); if (upCast(_context)->printFullCols()) { outputMgr->printRecord(rec, opVal); } else { QuickString outBuf; for (int i=0; i < (int)_groupCols.size(); i++) { outBuf.append(rec->getField(_groupCols[i])); outBuf.append('\t'); } outBuf.append(opVal); outputMgr->printRecord(NULL, outBuf); } }
unsigned long Jaccard::getTotalIntersection(RecordKeyVector &hits) { unsigned long intersection = 0; Record *key = hits.getKey(); CHRPOS keyStart = key->getStartPos(); CHRPOS keyEnd = key->getEndPos(); int hitIdx = 0; for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { Record *currRec = *iter; CHRPOS maxStart = max(currRec->getStartPos(), keyStart); CHRPOS minEnd = min(currRec->getEndPos(), keyEnd); if (_context->getObeySplits()) { intersection += upCast(_context)->getSplitBlockInfo()->getOverlapBases(hitIdx); hitIdx++; } else { intersection += (unsigned long)(minEnd - maxStart); } } _numIntersections += (int)hits.size(); return intersection; }
SkOpSpanBase* SkOpSpanBase::active() { SkOpSpanBase* result = fPrev ? fPrev->next() : upCast()->next()->prev(); SkASSERT(this == result || fDebugDeleted); return result; }
void IntersectFile::makeSweep() { _sweep = new NewChromSweep(upCast(_context)); }
bool ComplementFile::init() { _frm = static_cast<FileRecordMergeMgr *>(upCast(_context)->getFile(0)); return true; }
void SubtractFile::subtractHits(RecordKeyVector &hits) { if (hits.empty()) { // no intersection, nothing to subtract. // just copy key to hits as if it were a // self-intersection. This is just for reporting // purposes. hits.push_back(hits.getKey()); return; } if (upCast(_context)->getRemoveAll() && upCast(_context)->getSubtractFraction() == 0.0) { // hits aren't empty, meaning there is intersection, // so we want to not report the hit. _dontReport = true; return; } //loop through hits. Track which bases in query were covered Record *keyRec = hits.getKey(); int keyStart = keyRec->getStartPos(); int keyEnd = keyRec->getEndPos(); //this vector of bools will represent the bases of the query. //for each base, true means uncovered, false means covered. //they begin as all uncovered. vector<bool> keyBases(keyEnd - keyStart, true); //now loop through the hits, and cover corresponding query bases //by setting them to false. bool basesRemoved = false; for (RecordKeyVector::iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) { Record *hitRec = *iter; int hitStart = hitRec->getStartPos(); int hitEnd = hitRec->getEndPos(); int startIdx = max(keyStart, hitStart) - keyStart; int endIdx = min(keyEnd, hitEnd) - keyStart; int keyLen = keyEnd - keyStart; int coveredLen = endIdx - startIdx; float coveragePct = (float)coveredLen / (float)keyLen; //for each base in the hit, set the base in the query to false. //this effectively "erases" the covered bits. Only do if (upCast(_context)->getRemoveSum() || coveragePct >= upCast(_context)->getSubtractFraction()) { std::fill(keyBases.begin() + startIdx, keyBases.begin() + endIdx, false); basesRemoved = true; } } if (!basesRemoved) { //treat as if there were no intersection hits.clearVector(); hits.push_back(hits.getKey()); return; } else if (upCast(_context)->getRemoveAll()) { _dontReport = true; return; } // if the -N option is used ( removeSum), do not report if the percentage of // uniquely covered bases exceeds the overlap fraction. if (upCast(_context)->getRemoveSum()) { //determine how many bases are left uncovered. int numBasesUncovered = std::accumulate(keyBases.begin(), keyBases.end(), 0); //determine percentage that are covered. float pctCovered = 1.0 - (float)numBasesUncovered / (float)(keyEnd - keyStart); if (pctCovered > upCast(_context)->getSubtractFraction()) { _dontReport = true; return; } else { hits.clearVector(); hits.push_back(hits.getKey()); } return; } //now make "blocks" out of the query's remaining stretches of //uncovered bases. hits.clearVector(); for (int i = 0; i < (int)keyBases.size(); i++) { if (keyBases[i] == true) { int blockStart = keyStart + i; while (keyBases[i] == true && i < (int)keyBases.size()) { i++; } int blockEnd = min(keyStart + i, keyEnd); hits.push_back(_tmpBlocksMgr->allocateAndAssignRecord(keyRec, blockStart, blockEnd)); } } _deleteTmpBlocks = true; }
Array<T>* createScalarNode(const dim4 &size, const T val) { JIT::ScalarNode *node = NULL; node = new JIT::ScalarNode(upCast(val), isDouble<T>()); return createNodeArray<T>(size, reinterpret_cast<JIT::Node *>(node)); }