void FONSEModel::calculateLogLikelihoodRatioPerGroupingPerCategory(std::string grouping, Genome& genome, std::vector<double> &logAcceptanceRatioForAllMixtures) { int numGenes = genome.getGenomeSize(); //int numCodons = SequenceSummary::GetNumCodonsForAA(grouping); double likelihood = 0.0; double likelihood_proposed = 0.0; double mutation[5]; double selection[5]; double mutation_proposed[5]; double selection_proposed[5]; std::string curAA; Gene *gene; SequenceSummary *sequenceSummary; unsigned aaIndex = SequenceSummary::AAToAAIndex(grouping); #ifdef _OPENMP //#ifndef __APPLE__ #pragma omp parallel for private(mutation, selection, mutation_proposed, selection_proposed, curAA, gene, sequenceSummary) reduction(+:likelihood,likelihood_proposed) #endif for (unsigned i = 0u; i < numGenes; i++) { gene = &genome.getGene(i); sequenceSummary = gene->getSequenceSummary(); if (sequenceSummary->getAACountForAA(aaIndex) == 0) continue; // which mixture element does this gene belong to unsigned mixtureElement = parameter->getMixtureAssignment(i); // how is the mixture element defined. Which categories make it up unsigned mutationCategory = parameter->getMutationCategory(mixtureElement); unsigned selectionCategory = parameter->getSelectionCategory(mixtureElement); unsigned expressionCategory = parameter->getSynthesisRateCategory(mixtureElement); // get phi value, calculate likelihood conditional on phi double phiValue = parameter->getSynthesisRate(i, expressionCategory, false); // get current mutation and selection parameter parameter->getParameterForCategory(mutationCategory, FONSEParameter::dM, grouping, false, mutation); parameter->getParameterForCategory(selectionCategory, FONSEParameter::dOmega, grouping, false, selection); // get proposed mutation and selection parameter parameter->getParameterForCategory(mutationCategory, FONSEParameter::dM, grouping, true, mutation_proposed); parameter->getParameterForCategory(selectionCategory, FONSEParameter::dOmega, grouping, true, selection_proposed); likelihood += calculateLogLikelihoodRatioPerAA(*gene, grouping, mutation, selection, phiValue); likelihood_proposed += calculateLogLikelihoodRatioPerAA(*gene, grouping, mutation_proposed, selection_proposed, phiValue); } //likelihood_proposed = likelihood_proposed + calculateMutationPrior(grouping, true); //likelihood = likelihood + calculateMutationPrior(grouping, false); logAcceptanceRatioForAllMixtures[0] = (likelihood_proposed - likelihood); }
void PANSEModel::calculateLogLikelihoodRatioPerGroupingPerCategory(std::string grouping, Genome& genome, std::vector<double> &logAcceptanceRatioForAllMixtures) { double logLikelihood = 0.0; double logLikelihood_proposed = 0.0; Gene *gene; unsigned index = SequenceSummary::codonToIndex(grouping); #ifdef _OPENMP //#ifndef __APPLE__ #pragma omp parallel for private(gene) reduction(+:logLikelihood,logLikelihood_proposed) #endif for (unsigned i = 0u; i < genome.getGenomeSize(); i++) { gene = &genome.getGene(i); // which mixture element does this gene belong to unsigned mixtureElement = parameter->getMixtureAssignment(i); // how is the mixture element defined. Which categories make it up unsigned alphaCategory = parameter->getMutationCategory(mixtureElement); unsigned lambdaPrimeCategory = parameter->getSelectionCategory(mixtureElement); unsigned synthesisRateCategory = parameter->getSynthesisRateCategory(mixtureElement); // get non codon specific values, calculate likelihood conditional on these double phiValue = parameter->getSynthesisRate(i, synthesisRateCategory, false); unsigned currRFPObserved = gene->geneData.getRFPValue(index); unsigned currNumCodonsInMRNA = gene->geneData.getCodonCountForCodon(index); if (currNumCodonsInMRNA == 0) continue; double currAlpha = getParameterForCategory(alphaCategory, PANSEParameter::alp, grouping, false); double currLambdaPrime = getParameterForCategory(lambdaPrimeCategory, PANSEParameter::lmPri, grouping, false); double propAlpha = getParameterForCategory(alphaCategory, PANSEParameter::alp, grouping, true); double propLambdaPrime = getParameterForCategory(lambdaPrimeCategory, PANSEParameter::lmPri, grouping, true); logLikelihood += calculateLogLikelihoodPerCodonPerGene(currAlpha, currLambdaPrime, currRFPObserved, currNumCodonsInMRNA, phiValue); logLikelihood_proposed += calculateLogLikelihoodPerCodonPerGene(propAlpha, propLambdaPrime, currRFPObserved, currNumCodonsInMRNA, phiValue); } logAcceptanceRatioForAllMixtures[0] = logLikelihood_proposed - logLikelihood; }
void PANSEModel::simulateGenome(Genome &genome) { for (unsigned geneIndex = 0; geneIndex < genome.getGenomeSize(); geneIndex++) { unsigned mixtureElement = getMixtureAssignment(geneIndex); Gene gene = genome.getGene(geneIndex); double phi = parameter->getSynthesisRate(geneIndex, mixtureElement, false); Gene tmpGene = gene; for (unsigned codonIndex = 0; codonIndex < 61; codonIndex++) { std::string codon = SequenceSummary::codonArray[codonIndex]; unsigned alphaCat = parameter->getMutationCategory(mixtureElement); unsigned lambdaPrimeCat = parameter->getSelectionCategory(mixtureElement); double alpha = getParameterForCategory(alphaCat, PANSEParameter::alp, codon, false); double lambdaPrime = getParameterForCategory(lambdaPrimeCat, PANSEParameter::lmPri, codon, false); double alphaPrime = alpha * gene.geneData.getCodonCountForCodon(codon); #ifndef STANDALONE RNGScope scope; NumericVector xx(1); xx = rgamma(1, alphaPrime, 1.0/lambdaPrime); xx = rpois(1, xx[0] * phi); tmpGene.geneData.setRFPValue(codonIndex, xx[0]); #else std::gamma_distribution<double> GDistribution(alphaPrime,1.0/lambdaPrime); double tmp = GDistribution(Parameter::generator); std::poisson_distribution<unsigned> PDistribution(phi * tmp); unsigned simulatedValue = PDistribution(Parameter::generator); tmpGene.geneData.setRFPValue(codonIndex, simulatedValue); #endif } genome.addGene(tmpGene, true); } }
void FONSEModel::simulateGenome(Genome & genome) { unsigned codonIndex; std::string curAA; std::string tmpDesc = "Simulated Gene"; for (unsigned geneIndex = 0; geneIndex < genome.getGenomeSize(); geneIndex++) //loop over all genes in the genome { if (geneIndex % 100 == 0) my_print("Simulating Gene %\n", geneIndex); Gene gene = genome.getGene(geneIndex); SequenceSummary sequenceSummary = gene.geneData; std::string tmpSeq = "ATG"; //Always will have the start amino acid unsigned mixtureElement = getMixtureAssignment(geneIndex); unsigned mutationCategory = getMutationCategory(mixtureElement); unsigned selectionCategory = getSelectionCategory(mixtureElement); unsigned synthesisRateCategory = getSynthesisRateCategory(mixtureElement); double phi = getSynthesisRate(geneIndex, synthesisRateCategory, false); std::string geneSeq = gene.getSequence(); for (unsigned position = 1; position < (geneSeq.size() / 3); position++) { std::string codon = geneSeq.substr((position * 3), 3); curAA = SequenceSummary::codonToAA(codon); //TODO: Throw an error here instead if (curAA == "X") { if (position < (geneSeq.size() / 3) - 1) my_print("Warning: Internal stop codon found in gene % at position %. Ignoring and moving on.\n", gene.getId(), position); continue; } unsigned numCodons = SequenceSummary::GetNumCodonsForAA(curAA); double* codonProb = new double[numCodons](); //size the arrays to the proper size based on # of codons. double* mutation = new double[numCodons - 1](); double* selection = new double[numCodons - 1](); if (curAA == "M" || curAA == "W") { codonProb[0] = 1; } else { getParameterForCategory(mutationCategory, FONSEParameter::dM, curAA, false, mutation); getParameterForCategory(selectionCategory, FONSEParameter::dOmega, curAA, false, selection); calculateCodonProbabilityVector(numCodons, position, mutation, selection, phi, codonProb); } codonIndex = Parameter::randMultinom(codonProb, numCodons); unsigned aaStart, aaEnd; SequenceSummary::AAToCodonRange(curAA, aaStart, aaEnd, false); //need the first spot in the array where the codons for curAA are codon = sequenceSummary.indexToCodon(aaStart + codonIndex);//get the correct codon based off codonIndex tmpSeq += codon; } std::string codon = sequenceSummary.indexToCodon((unsigned)Parameter::randUnif(61.0, 64.0)); //randomly choose a stop codon, from range 61-63 tmpSeq += codon; Gene simulatedGene(tmpSeq, tmpDesc, gene.getId()); genome.addGene(simulatedGene, true); } }