XHMM::xhmmInputManager::IntervalSet* XHMM::xhmmInputManager::readIntervalsFromFile(string intervalsFile, const set<char>& excludeLinesStartingWith) { HMM_PP::istreamLineReader* intervalsStream = HMM_PP::utils::getIstreamLineReaderFromFile(intervalsFile); if (intervalsStream == NULL) throw new Exception("Unable to read table from file '" + intervalsFile + "'"); IntervalSet* intervSet = new IntervalSet(); while (!intervalsStream->eof()) { string* line = new string(); *intervalsStream >> *line; if (line->empty()) { delete line; continue; } char firstChar = (*line)[0]; if (firstChar != NO_EXCLUDE_CHAR && excludeLinesStartingWith.find(firstChar) != excludeLinesStartingWith.end()) { delete line; continue; } stringstream* lineStream = new stringstream(*line); delete line; intervSet->insert(Interval(*lineStream)); delete lineStream; } delete intervalsStream; return intervSet; }
// Initialize intervals, one for each unique attribute value void initialize_intervals(int dimIndex) { g_intervals.clear(); IntervalSet interval; TupleVec::iterator it = g_data.begin(), end = g_data.end(), next; int index = 0; for ( ; it != end; ++it, ++index) { next = it+1; // Add element (index) to interval interval.insert(index); bool insertInterval = (next == end) || ( (next->first)[dimIndex] != (it->first)[dimIndex] ); if (insertInterval) { // Insert interval into list if // (a) end of sequence or // (b) next element is not the same as current element g_intervals.push_back(interval); interval.clear(); } } // Debug print_all_intervals(); }
XHMM::xhmmInputManager::LoadedReadDepths XHMM::xhmmInputManager::loadReadDepthsFromFile(string readDepthFile, IntervalSet* excludeTargets, StringSet* excludeTargetChromosomes, StringSet* excludeSamples, const ullint minTargetSize, const ullint maxTargetSize) { HMM_PP::DoubleMat* rdMat = MatrixReader<double>::readMatrixFromFile(readDepthFile); StringSet* excludedSamples = new StringSet(); IntervalSet* excludedTargets = new IntervalSet(); set<ullint>* excludeSampleIndices = new set<ullint>(); if (excludeSamples != NULL) { for (ullint row = 0; row < rdMat->nrow(); ++row) { string samp = rdMat->rowName(row); if (excludeSamples->find(samp) != excludeSamples->end()) { cerr << "Excluded sample " << samp << endl; excludedSamples->insert(samp); excludeSampleIndices->insert(row); } } } set<ullint>* excludeTargetIndices = new set<ullint>(); if (excludeTargets != NULL || excludeTargetChromosomes != NULL || minTargetSize > 0 || maxTargetSize < ULLINT_INFINITY) { for (ullint j = 0; j < rdMat->ncol(); ++j) { const Interval targJ(rdMat->colName(j)); const ullint targLen = targJ.span(); bool targLenFails = (targLen < minTargetSize || targLen > maxTargetSize); if ((excludeTargets != NULL && excludeTargets->find(targJ) != excludeTargets->end()) || (excludeTargetChromosomes != NULL && excludeTargetChromosomes->find(targJ.getChr()) != excludeTargetChromosomes->end()) || targLenFails) { cerr << "Excluded target " << targJ; if (targLenFails) cerr << " of length " << targLen; cerr << endl; excludeTargetIndices->insert(j); excludedTargets->insert(targJ); } } } if (!excludeSampleIndices->empty() || !excludeTargetIndices->empty()) { HMM_PP::DoubleMat* newRdMat = rdMat->deleteRowsAndColumns(excludeSampleIndices, excludeTargetIndices); delete rdMat; rdMat = newRdMat; } delete excludeSampleIndices; delete excludeTargetIndices; return LoadedReadDepths(rdMat, excludedTargets, excludedSamples); }
int main() { IntervalSet s; s.insert(Interval<size_t>(1,10)); s.insert(Interval<size_t>(10,20)); s.insert(Interval<size_t>(20,30)); s.insert(Interval<size_t>(30,40)); print_set(s); printf("Matching intervals:\n"); std::pair<IntervalSet::const_iterator,IntervalSet::const_iterator> ir = intersecting_intervals(s, Interval<size_t>(10,25)); for(IntervalSet::const_iterator i=ir.first; i!=ir.second; ++i) { printf("%i-%i ", (int)i->start, (int)i->end); } printf("\n"); intervalset_merge(s, Interval<size_t>(25, 50)); print_set(s); }
int main() { using namespace ch10; using namespace std; IntervalSet intervals; intervals.insert(1); intervals.insert(100); intervals.insert(200); intervals.insert(300); intervals.insert(400); intervals.insert(500); intervals.insert(600); intervals.insert(700); intervals.insert(800); intervals.insert(900); intervals.insert(1000); Histogram hist(intervals); cout << "Uniform distribution:" << endl; UniformRandomGenerator gen1(1, 1000); for(int i = 0; i < 100; i++) { hist.add(gen1.draw()); } hist.print_formatted(cout); cout << "Exponential distribution:" << endl; ExponentialRandomGenerator gen2(1,200); hist = Histogram(intervals); for(int i = 0; i < 100; i++) { int result = gen2.draw(); if(result >= 1 && result < 1000) hist.add(result); } hist.print_formatted(cout); cout << "Normal distribution:" << endl; NormalRandomGenerator gen3(500, 200); hist = Histogram(intervals); for(int i = 0; i < 100; i++) { int result = gen3.draw(); if(result >= 1 && result < 1000) hist.add(result); } hist.print_formatted(cout); }
int main() { using namespace ch10; using namespace std; try { IntervalSet intervals; intervals.insert(1); intervals.insert(10); intervals.insert(20); intervals.insert(30); intervals.insert(40); intervals.insert(50); intervals.insert(60); intervals.insert(70); intervals.insert(80); intervals.insert(90); intervals.insert(100); Histogram hist(intervals); cout << "Histogram output with fixed numbers:" << endl; hist.add(1).add(10).add(11).add(19).add(20).add(11).add(89).add(89).add(89).add(90).add(95).add(100); hist.print_formatted(std::cout); cout << endl << "Histogram with random numbers:" << endl; hist = Histogram(intervals); srand(clock()); for(int i = 0; i < 100; i++) { hist.add(rand() % 100 + 1); } hist.print_formatted(std::cout); } catch(Histogram::OutOfRangeError) { cerr << "The value was out of range." << endl; } catch(Histogram::InvalidIntervalSetError) { cerr << "Invalid interval." << endl; } }