Exemplo n.º 1
0
XHMM::xhmmInputManager::LoadedReadDepths
XHMM::xhmmInputManager::loadReadDepthsFromFile(string readDepthFile, IntervalSet* excludeTargets, StringSet* excludeTargetChromosomes, StringSet* excludeSamples,
		const ullint minTargetSize, const ullint maxTargetSize) {

	HMM_PP::DoubleMat* rdMat = MatrixReader<double>::readMatrixFromFile(readDepthFile);
	StringSet* excludedSamples = new StringSet();
	IntervalSet* excludedTargets = new IntervalSet();

	set<ullint>* excludeSampleIndices = new set<ullint>();
	if (excludeSamples != NULL) {
		for (ullint row = 0; row < rdMat->nrow(); ++row) {
			string samp = rdMat->rowName(row);

			if (excludeSamples->find(samp) != excludeSamples->end()) {
				cerr << "Excluded sample " << samp << endl;

				excludedSamples->insert(samp);
				excludeSampleIndices->insert(row);
			}
		}
	}

	set<ullint>* excludeTargetIndices = new set<ullint>();
	if (excludeTargets != NULL || excludeTargetChromosomes != NULL || minTargetSize > 0 || maxTargetSize < ULLINT_INFINITY) {
		for (ullint j = 0; j < rdMat->ncol(); ++j) {
			const Interval targJ(rdMat->colName(j));
			const ullint targLen = targJ.span();
			bool targLenFails = (targLen < minTargetSize || targLen > maxTargetSize);

			if ((excludeTargets != NULL && excludeTargets->find(targJ) != excludeTargets->end()) ||
					(excludeTargetChromosomes != NULL && excludeTargetChromosomes->find(targJ.getChr()) != excludeTargetChromosomes->end()) ||
					targLenFails) {
				cerr << "Excluded target " << targJ;
				if (targLenFails)
					cerr << " of length " << targLen;
				cerr << endl;

				excludeTargetIndices->insert(j);
				excludedTargets->insert(targJ);
			}
		}
	}

	if (!excludeSampleIndices->empty() || !excludeTargetIndices->empty()) {
		HMM_PP::DoubleMat* newRdMat = rdMat->deleteRowsAndColumns(excludeSampleIndices, excludeTargetIndices);
		delete rdMat;
		rdMat = newRdMat;
	}
	delete excludeSampleIndices;
	delete excludeTargetIndices;

	return LoadedReadDepths(rdMat, excludedTargets, excludedSamples);
}
Exemplo n.º 2
0
void XHMM::ReadDepthMatrixLoader::readTargets() {
	stringstream* lineStream = new stringstream(*_header);
	delete _header;
	_header = NULL;

	Interval* prevTarget = NULL;
	while (*lineStream && !lineStream->eof()) {
		string targetString;
		*lineStream >> targetString;
		if (!*lineStream)
			throw new Exception("Data input stream failed while reading target " + targetString);

		Interval* curTarget = new Interval(targetString);
		if (_chrStopInds->find(curTarget->getChr()) != _chrStopInds->end())
			throw new Exception("MUST provide targets in order and GROUPED BY CHROMOSOME: target " + curTarget->intervalString() + " reverts to chromosome " + curTarget->getChr() + ", which was interrupted by targets in other chromosomes");
		_targets->push_back(*curTarget);

		if (prevTarget == NULL || curTarget->getChr() != prevTarget->getChr()) // curTarget is a new chromosome
			(*_chrStartInds)[curTarget->getChr()] = _targets->size() - 1;

		if (prevTarget != NULL) {
			if (curTarget->getChr() != prevTarget->getChr()) // switched to a new chromosome, so can never go back to prevTarget->getChr()
				(*_chrStopInds)[prevTarget->getChr()] = _targets->size() - 2; // subtract 2 (and not 1) since already added curTarget
			else if (!(curTarget->getBp1() > prevTarget->getBp2()))
				throw new Exception("MUST provide NON-OVERLAPPING targets IN ORDER: target " + curTarget->intervalString() + " either overlaps with or precedes previous target " + prevTarget->intervalString());

			delete prevTarget;
		}
		prevTarget = curTarget;
	}

	if (prevTarget != NULL) {
		(*_chrStopInds)[prevTarget->getChr()] = _targets->size() - 1; // since now prevTarget points to the LAST target, which ends its chromosome
		delete prevTarget;
	}

	for (map<string, uint>::const_iterator stopIt = _chrStopInds->begin(); stopIt != _chrStopInds->end(); ++stopIt)
		_listAllChrStopInds->insert(stopIt->second);

	delete lineStream;
}