XHMM::xhmmInputManager::LoadedReadDepths XHMM::xhmmInputManager::loadReadDepthsFromFile(string readDepthFile, IntervalSet* excludeTargets, StringSet* excludeTargetChromosomes, StringSet* excludeSamples, const ullint minTargetSize, const ullint maxTargetSize) { HMM_PP::DoubleMat* rdMat = MatrixReader<double>::readMatrixFromFile(readDepthFile); StringSet* excludedSamples = new StringSet(); IntervalSet* excludedTargets = new IntervalSet(); set<ullint>* excludeSampleIndices = new set<ullint>(); if (excludeSamples != NULL) { for (ullint row = 0; row < rdMat->nrow(); ++row) { string samp = rdMat->rowName(row); if (excludeSamples->find(samp) != excludeSamples->end()) { cerr << "Excluded sample " << samp << endl; excludedSamples->insert(samp); excludeSampleIndices->insert(row); } } } set<ullint>* excludeTargetIndices = new set<ullint>(); if (excludeTargets != NULL || excludeTargetChromosomes != NULL || minTargetSize > 0 || maxTargetSize < ULLINT_INFINITY) { for (ullint j = 0; j < rdMat->ncol(); ++j) { const Interval targJ(rdMat->colName(j)); const ullint targLen = targJ.span(); bool targLenFails = (targLen < minTargetSize || targLen > maxTargetSize); if ((excludeTargets != NULL && excludeTargets->find(targJ) != excludeTargets->end()) || (excludeTargetChromosomes != NULL && excludeTargetChromosomes->find(targJ.getChr()) != excludeTargetChromosomes->end()) || targLenFails) { cerr << "Excluded target " << targJ; if (targLenFails) cerr << " of length " << targLen; cerr << endl; excludeTargetIndices->insert(j); excludedTargets->insert(targJ); } } } if (!excludeSampleIndices->empty() || !excludeTargetIndices->empty()) { HMM_PP::DoubleMat* newRdMat = rdMat->deleteRowsAndColumns(excludeSampleIndices, excludeTargetIndices); delete rdMat; rdMat = newRdMat; } delete excludeSampleIndices; delete excludeTargetIndices; return LoadedReadDepths(rdMat, excludedTargets, excludedSamples); }
void XHMM::ReadDepthMatrixLoader::readTargets() { stringstream* lineStream = new stringstream(*_header); delete _header; _header = NULL; Interval* prevTarget = NULL; while (*lineStream && !lineStream->eof()) { string targetString; *lineStream >> targetString; if (!*lineStream) throw new Exception("Data input stream failed while reading target " + targetString); Interval* curTarget = new Interval(targetString); if (_chrStopInds->find(curTarget->getChr()) != _chrStopInds->end()) throw new Exception("MUST provide targets in order and GROUPED BY CHROMOSOME: target " + curTarget->intervalString() + " reverts to chromosome " + curTarget->getChr() + ", which was interrupted by targets in other chromosomes"); _targets->push_back(*curTarget); if (prevTarget == NULL || curTarget->getChr() != prevTarget->getChr()) // curTarget is a new chromosome (*_chrStartInds)[curTarget->getChr()] = _targets->size() - 1; if (prevTarget != NULL) { if (curTarget->getChr() != prevTarget->getChr()) // switched to a new chromosome, so can never go back to prevTarget->getChr() (*_chrStopInds)[prevTarget->getChr()] = _targets->size() - 2; // subtract 2 (and not 1) since already added curTarget else if (!(curTarget->getBp1() > prevTarget->getBp2())) throw new Exception("MUST provide NON-OVERLAPPING targets IN ORDER: target " + curTarget->intervalString() + " either overlaps with or precedes previous target " + prevTarget->intervalString()); delete prevTarget; } prevTarget = curTarget; } if (prevTarget != NULL) { (*_chrStopInds)[prevTarget->getChr()] = _targets->size() - 1; // since now prevTarget points to the LAST target, which ends its chromosome delete prevTarget; } for (map<string, uint>::const_iterator stopIt = _chrStopInds->begin(); stopIt != _chrStopInds->end(); ++stopIt) _listAllChrStopInds->insert(stopIt->second); delete lineStream; }