double FONSEModel::calculateLogLikelihoodRatioPerAA(Gene& gene, std::string grouping, double *mutation, double *selection, double phiValue)
{
	int numCodons = SequenceSummary::GetNumCodonsForAA(grouping);
	double logLikelihood = 0.0;

	std::vector <unsigned> *positions;
	double codonProb[6];

	unsigned maxIndexVal = 0u;
	for (int i = 1; i < (numCodons - 1); i++)
	{
		if (selection[maxIndexVal] < selection[i])
		{
			maxIndexVal = i;
		}
	}

	unsigned aaStart, aaEnd;
	SequenceSummary::AAToCodonRange(grouping, aaStart, aaEnd, false);
	for (unsigned i = aaStart, k = 0; i < aaEnd; i++, k++) {
		positions = gene.geneData.getCodonPositions(i);
		for (unsigned j = 0; j < positions->size(); j++) {
			calculateCodonProbabilityVector(numCodons, positions->at(j), maxIndexVal, mutation, selection, phiValue, codonProb);
			if (codonProb[k] == 0) continue;
			logLikelihood += std::log(codonProb[k]);
		}
		//positions->clear();
	}

	return logLikelihood;
}
Beispiel #2
0
void FONSEModel::simulateGenome(Genome & genome)
{
	unsigned codonIndex;
	std::string curAA;

	std::string tmpDesc = "Simulated Gene";

	for (unsigned geneIndex = 0; geneIndex < genome.getGenomeSize(); geneIndex++) //loop over all genes in the genome
	{
		if (geneIndex % 100 == 0) my_print("Simulating Gene %\n", geneIndex);
		Gene gene = genome.getGene(geneIndex);
		SequenceSummary sequenceSummary = gene.geneData;
		std::string tmpSeq = "ATG"; //Always will have the start amino acid


		unsigned mixtureElement = getMixtureAssignment(geneIndex);
		unsigned mutationCategory = getMutationCategory(mixtureElement);
		unsigned selectionCategory = getSelectionCategory(mixtureElement);
		unsigned synthesisRateCategory = getSynthesisRateCategory(mixtureElement);
		double phi = getSynthesisRate(geneIndex, synthesisRateCategory, false);

		std::string geneSeq = gene.getSequence();
		for (unsigned position = 1; position < (geneSeq.size() / 3); position++)
		{
			std::string codon = geneSeq.substr((position * 3), 3);
			curAA = SequenceSummary::codonToAA(codon);

			//TODO: Throw an error here instead
			if (curAA == "X") {
				if (position < (geneSeq.size() / 3) - 1) my_print("Warning: Internal stop codon found in gene % at position %. Ignoring and moving on.\n", gene.getId(), position);
				continue;
			}

			unsigned numCodons = SequenceSummary::GetNumCodonsForAA(curAA);

			double* codonProb = new double[numCodons](); //size the arrays to the proper size based on # of codons.
			double* mutation = new double[numCodons - 1]();
			double* selection = new double[numCodons - 1]();


			if (curAA == "M" || curAA == "W")
			{
				codonProb[0] = 1;
			}
			else
			{
				getParameterForCategory(mutationCategory, FONSEParameter::dM, curAA, false, mutation);
				getParameterForCategory(selectionCategory, FONSEParameter::dOmega, curAA, false, selection);
				calculateCodonProbabilityVector(numCodons, position, mutation, selection, phi, codonProb);
			}


			codonIndex = Parameter::randMultinom(codonProb, numCodons);
			unsigned aaStart, aaEnd;
			SequenceSummary::AAToCodonRange(curAA, aaStart, aaEnd, false);  //need the first spot in the array where the codons for curAA are
			codon = sequenceSummary.indexToCodon(aaStart + codonIndex);//get the correct codon based off codonIndex
			tmpSeq += codon;
		}
		std::string codon = sequenceSummary.indexToCodon((unsigned)Parameter::randUnif(61.0, 64.0)); //randomly choose a stop codon, from range 61-63
		tmpSeq += codon;
		Gene simulatedGene(tmpSeq, tmpDesc, gene.getId());
		genome.addGene(simulatedGene, true);
	}
}