void _LikelihoodFunction::ComputeSiteLikelihoodsForABlock (long index, _Parameter* results, _SimpleList& scalers, long branchIndex, _SimpleList* branchValues, char mpiRunMode) // assumes that results is at least blockLength slots long { if (blockDependancies.lData[index]) PopulateConditionalProbabilities(index, mpiRunMode == _hyphyLFMPIModeREL ?_hyphyLFConditionMPIIterate:_hyphyLFConditionProbsWeightedSum, results, scalers, branchIndex, branchValues); else { ComputeBlock (index, results, -1, branchIndex, branchValues); scalers.Clear (); scalers.Duplicate (siteCorrections(index)); } }
Observations MultivariateModel ::SimulateData(io::DataSettings &DS) { /// This function simulates observations (Patients and their measurements y_ij at different time points t_ij) /// according to the model, with a given noise level e_ij, such that y_ij = f(t_ij) + e_ij /// Their is two option: /// 1) The model is not initialized (neither random variables of number of realizations) and it has to be /// This case corresponds to simulated data used to test the model, meaning if it can recover the random variables /// used to simulate the data /// 2) The model is already initialized, thus it should rely on its current state (random variables, realizations, ...) /// This case corresponds to new data, for a dataset augmentation for instance // TODO : /// PITFALL : As for now, only the first option is implemented /// PITFALL2 : Take a closer look at / merge with InitializeFakeRandomVariables /// Initialize the model m_ManifoldDimension = DS.GetCognitiveScoresDimension(); m_NumberOfSubjects = DS.GetNumberOfSimulatedSubjects(); m_RealizationsPerRandomVariable["G"] = 1; for(size_t i = 1; i < m_ManifoldDimension; ++i) m_RealizationsPerRandomVariable["Delta#" + std::to_string(i)] = 1; for(size_t i = 0; i < m_NbIndependentSources*(m_ManifoldDimension - 1); ++i) m_RealizationsPerRandomVariable["Beta#" + std::to_string(i)] = 1; m_RealizationsPerRandomVariable["Ksi"] = m_NumberOfSubjects; m_RealizationsPerRandomVariable["Tau"] = m_NumberOfSubjects; for(int i = 0; i < m_NbIndependentSources; ++i) m_RealizationsPerRandomVariable["S#" + std::to_string(i)] = m_NumberOfSubjects; auto R = SimulateRealizations(); /// Update the model m_G = exp(R.at("G", 0)); ComputeDeltas(R); ComputeOrthonormalBasis(); ComputeAMatrix(R); ComputeSpaceShifts(R); ComputeBlock(R); /// Simulate the data std::random_device RD; std::mt19937 RNG(RD()); std::uniform_int_distribution<int> Uni(DS.GetMinimumNumberOfObservations(), DS.GetMaximumNumberOfObservations()); UniformRandomVariable NumberOfTimePoints(60, 95); GaussianRandomVariable Noise(0, m_Noise->GetVariance()); /// Simulate the data Observations Obs; for(int i = 0; i < m_NumberOfSubjects; ++i) { /// Get a random number of timepoints and sort them VectorType T = NumberOfTimePoints.Samples(Uni(RNG)); T.sort(); m_SubjectTimePoints.push_back(T); /// Simulate the data base on the time-points IndividualObservations IO(T); std::vector<VectorType> Landmarks; for(size_t j = 0; j < T.size(); ++j) { Landmarks.push_back(ComputeParallelCurve(i, j) + Noise.Samples(m_ManifoldDimension)); } IO.AddLandmarks(Landmarks); Obs.AddIndividualData(IO); } /// Initialize the observation and model attributes Obs.InitializeGlobalAttributes(); m_IndividualObservationDate = Obs.GetObservations(); m_SumObservations = Obs.GetTotalSumOfLandmarks(); m_NbTotalOfObservations = Obs.GetTotalNumberOfObservations(); return Obs; }
void MultivariateModel ::UpdateModel(const Realizations &R, int Type, const std::vector<std::string, std::allocator<std::string>> Names) { /// Given a list of names (which in fact corresponds to the variables that have potentially changed), /// the function updates the parameters associated to these names /// Possible parameters to update, depending on the name being in "vect<> Names" bool ComputeG = false; bool ComputeDelta = false; bool ComputeBasis = false; bool ComputeA = false; bool ComputeSpaceShift = false; bool ComputeBlock_ = false; bool IndividualOnly = (Type > -1); /// Parameters to update, depending on the names called for(auto it = Names.begin(); it != Names.end(); ++it) { std::string Name = it->substr(0, it->find_first_of("#")); if(Name == "None") { continue; } else if(Name == "Ksi" or Name == "Tau") { continue; } else if("G" == Name) { IndividualOnly = false; ComputeBasis = true; ComputeA = true; ComputeSpaceShift = true; ComputeBlock_ = true; } else if("Delta" == Name) { IndividualOnly = false; ComputeBasis = true; ComputeA = true; ComputeSpaceShift = true; ComputeBlock_ = true; } else if("Beta" == Name) { IndividualOnly = false; ComputeA = true; ComputeSpaceShift = true; } else if("S" == Name) { ComputeSpaceShift = true; } else if("All" == Name) { ComputeSubjectTimePoint(R, -1); IndividualOnly = false; ComputeG = true; ComputeDelta = true; ComputeBasis = true; ComputeA = true; ComputeSpaceShift = true; ComputeBlock_ = true; } else { std::cerr << "The realization does not exist in the multivariate model > update model" << std::endl; } } // TODO : To parse it even faster, update just the coordinates within the names if(IndividualOnly) ComputeSubjectTimePoint(R, Type); if(ComputeG) m_G = exp(R.at("G", 0)); if(ComputeDelta) ComputeDeltas(R); if(ComputeBasis) ComputeOrthonormalBasis(); if(ComputeA) ComputeAMatrix(R); if(ComputeSpaceShift) ComputeSpaceShifts(R); if(ComputeBlock_) ComputeBlock(R); }
void _LikelihoodFunction::PopulateConditionalProbabilities (long index, char runMode, _Parameter* buffer, _SimpleList& scalers, long branchIndex, _SimpleList* branchValues) // this function computes site probabilties for each rate class (or something else that involves iterating over rate classes) // see run options below // run mode can be one of the following // _hyphyLFConditionProbsRawMatrixMode : simply populate an M (number of rate classes) x S (number of site patterns) matrix of conditional likelihoods // : expected minimum dimension of buffer is M*S // : scalers will have M*S entries laid out as S for rate class 0, S for rate class 1, .... S for rate class M-1 // _hyphyLFConditionProbsScaledMatrixMode : simply populate an M (number of rate classes) x S (number of site patterns) and scale to the lowest multiplier // : expected minimum dimension of buffer is M*S // : scalers will have S entries // _hyphyLFConditionProbsWeightedSum : compute a sum for each site using weighted by the probability of a given category // : expected minimum dimension of buffer is 2*S // : scalers will have S entries // : **note that the behavior is different if there are HMM (or constant on partition) variables // : the size of the buffer is S*(N+1), where N is the cumulative number of categories in such variables for this partition // : the size of the scaler is also S*N // : the code will behave as _hyphyLFConditionProbsScaledMatrixMode with all other category variables // : summed CONDITIONED on the values of HMM/Constant on partition // _hyphyLFConditionProbsMaxProbClass : compute the category index of maximum probability // : expected minimum dimension of buffer is 3*S -- the result goes into offset 0 // : scalers will have S entries // _hyphyLFConditionProbsClassWeights : compute the weight of each rate class index // : expected minimum dimension of buffer is M // : scalers will have no entries // _hyphyLFConditionMPIIterate : compute conditional likelihoods of the partition using MPI // : run mode effectively the same as _hyphyLFConditionProbsWeightedSum { _List *traversalPattern = (_List*)categoryTraversalTemplate(index), *variables = (_List*)((*traversalPattern)(0)), *catWeigths = nil; _SimpleList *categoryCounts = (_SimpleList*)((*traversalPattern)(1)), *categoryOffsets = (_SimpleList*)((*traversalPattern)(2)), *hmmAndCOP = (_SimpleList*)((*traversalPattern)(3)), categoryValues (categoryCounts->lLength,0,0); long totalSteps = categoryOffsets->lData[0] * categoryCounts->lData[0], catCount = variables->lLength-1, blockLength = BlockLength(index), hmmCatSize = hmmAndCOP->Element(-1), hmmCatCount = hmmAndCOP->lLength?(totalSteps/hmmCatSize):0, currentHMMCat = 1, arrayDim ; bool isTrivial = variables->lLength == 0, switchingHMM = false; _CategoryVariable *catVariable; switch (runMode) { case _hyphyLFConditionProbsRawMatrixMode: arrayDim = catCount*blockLength; break; case _hyphyLFConditionProbsClassWeights: arrayDim = 0; break; default: arrayDim = hmmCatCount?blockLength*hmmCatSize:blockLength; } if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate || runMode == _hyphyLFConditionProbsClassWeights) { if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate) { long upperBound = hmmCatCount?hmmAndCOP->Element(-1)*blockLength:blockLength; for (long r = 0; r < upperBound; r++) buffer[r] = 0.; } catWeigths = new _List; } else if (runMode == _hyphyLFConditionProbsMaxProbClass) for (long r = 0, r2 = 2*blockLength; r < blockLength; r++, r2++) { buffer[r] = 0.0; buffer[r2] = 0.0; } for (long currentCat = 0; currentCat <= catCount; currentCat++) { (catVariable = ((_CategoryVariable**)(variables->lData))[currentCat])->Refresh(); catVariable->SetIntervalValue(0,true); if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate || runMode == _hyphyLFConditionProbsClassWeights) (*catWeigths) << catVariable->GetWeights(); } scalers.Populate (arrayDim,0,0); #ifdef __HYPHYMPI__ _GrowingVector * computedWeights = nil; if (runMode == _hyphyLFConditionMPIIterate) { computedWeights = new _GrowingVector; } long mpiTasksSent = 0; #endif for (long pass = 0; pass < 1+(runMode == _hyphyLFConditionMPIIterate); pass++) { for (long currentRateCombo = 0; currentRateCombo < totalSteps; currentRateCombo++) { // setting each category variable to its appropriate value _Parameter currentRateWeight = 1.; if (pass == 0) { if (!isTrivial) { long remainder = currentRateCombo % categoryCounts->lData[catCount]; if (hmmCatCount) { currentHMMCat = currentRateCombo / hmmCatCount; switchingHMM = (currentRateCombo % hmmCatCount) == 0; } if (currentRateCombo && remainder == 0) { categoryValues.lData[catCount] = 0; (((_CategoryVariable**)(variables->lData))[catCount])->SetIntervalValue(0); for (long uptick = catCount-1; uptick >= 0; uptick --) { categoryValues.lData[uptick]++; if (categoryValues.lData[uptick] == categoryCounts->lData[uptick]) { categoryValues.lData[uptick] = 0; (((_CategoryVariable**)(variables->lData))[uptick])->SetIntervalValue(0); } else { (((_CategoryVariable**)(variables->lData))[uptick])->SetIntervalValue(categoryValues.lData[uptick]); break; } } } else { if (currentRateCombo) { categoryValues.lData[catCount]++; (((_CategoryVariable**)(variables->lData))[catCount])->SetIntervalValue(remainder); } } } if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionProbsClassWeights || runMode == _hyphyLFConditionMPIIterate) { for (long currentCat = hmmCatCount; currentCat <= catCount; currentCat++) currentRateWeight *= ((_Matrix**)catWeigths->lData)[currentCat]->theData[categoryValues.lData[currentCat]]; #ifdef __HYPHYMPI__ if (runMode == _hyphyLFConditionMPIIterate && pass == 0) computedWeights->Store(currentRateWeight); #endif if (runMode == _hyphyLFConditionProbsClassWeights) { buffer [currentRateCombo] = currentRateWeight; continue; } else if (currentRateWeight == 0.0) // nothing to do, eh? continue; #ifdef __HYPHYMPI__ else { if (runMode == _hyphyLFConditionMPIIterate) { SendOffToMPI (currentRateCombo); mpiTasksSent ++; continue; } } #endif } } long useThisPartitonIndex = currentRateCombo; #ifdef __HYPHYMPI__ if (runMode == _hyphyLFConditionMPIIterate) { MPI_Status status; ReportMPIError(MPI_Recv (resTransferMatrix.theData, resTransferMatrix.GetSize(), MPI_DOUBLE, MPI_ANY_SOURCE , HYPHY_MPI_DATA_TAG, MPI_COMM_WORLD,&status),true); useThisPartitonIndex = status.MPI_SOURCE-1; currentRateWeight = computedWeights->theData[useThisPartitonIndex]; } #endif // now that the categories are set we can proceed with the computing step long indexShifter = blockLength * useThisPartitonIndex; long *siteCorrectors = ((_SimpleList**)siteCorrections.lData)[index]->lLength? (((_SimpleList**)siteCorrections.lData)[index]->lData) + indexShifter :nil; if (runMode == _hyphyLFConditionProbsRawMatrixMode || runMode == _hyphyLFConditionProbsScaledMatrixMode) // populate the matrix of conditionals and scaling factors { _Parameter _hprestrict_ *bufferForThisCategory = buffer + indexShifter; ComputeBlock (index, bufferForThisCategory, useThisPartitonIndex, branchIndex, branchValues); if (usedCachedResults) { bool saveFR = forceRecomputation; forceRecomputation = true; ComputeBlock (index, bufferForThisCategory, useThisPartitonIndex, branchIndex, branchValues); forceRecomputation = saveFR; } if (runMode == _hyphyLFConditionProbsRawMatrixMode) for (long p = 0; p < blockLength; p++) scalers.lData[p+indexShifter] = siteCorrectors[p]; else { if (siteCorrectors) { for (long r1 = 0; r1 < blockLength; r1++) { long scv = *siteCorrectors, scalerDifference = scv-scalers.lData[r1]; if (scalerDifference > 0) // this class has a _bigger_ scaling factor than at least one other class // hence it needs to be scaled down (unless it's the first class) { if (useThisPartitonIndex==0) //(scalers.lData[r1] == -1) scalers.lData[r1] = scv; else bufferForThisCategory[r1] *= acquireScalerMultiplier (scalerDifference); } else { if (scalerDifference < 0) // this class is a smaller scaling factor, i.e. its the biggest among all those // considered so far; all other classes need to be scaled down { _Parameter scaled = acquireScalerMultiplier (-scalerDifference); for (long z = indexShifter+r1-blockLength; z >= 0; z-=blockLength) buffer[z] *= scaled; scalers.lData[r1] = scv; } } siteCorrectors++; } } } } else { if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionProbsMaxProbClass || runMode == _hyphyLFConditionMPIIterate) { //if (branchIndex>=0) // ((_TheTree*)LocateVar(theTrees.lData[index]))->AddBranchToForcedRecomputeList (branchIndex+((_TheTree*)LocateVar(theTrees.lData[index]))->GetLeafCount()); #ifdef __HYPHYMPI__ if (runMode == _hyphyLFConditionMPIIterate) { long offset = resTransferMatrix.GetVDim(); for (long k = 0; k < blockLength; k++) { buffer[blockLength+k] = resTransferMatrix.theData[k]; siteCorrectors[k] = resTransferMatrix.theData[k+offset]; } } else #endif ComputeBlock (index, buffer + (hmmCatCount?hmmCatSize:1)*blockLength, useThisPartitonIndex, branchIndex, branchValues); if (runMode != _hyphyLFConditionMPIIterate && usedCachedResults) { bool saveFR = forceRecomputation; forceRecomputation = true; ComputeBlock (index, buffer + (hmmCatCount?hmmCatSize:1)*blockLength, useThisPartitonIndex, branchIndex, branchValues); forceRecomputation = saveFR; } if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate) { long lowerBound = hmmCatCount?blockLength*currentHMMCat:0, upperBound = hmmCatCount?blockLength*(1+currentHMMCat):blockLength, lowerBound2 = hmmCatCount?(hmmCatSize*blockLength):blockLength; for (long r1 = lowerBound, r2 = lowerBound2; r1 < upperBound; r1++,r2++) { if (siteCorrectors) { long scv = *siteCorrectors; if (scv < scalers.lData[r1]) // this class has a _smaller_ scaling factor { buffer[r1] = currentRateWeight * buffer[r2] + buffer[r1] * acquireScalerMultiplier (scalers.lData[r1] - scv); scalers.lData[r1] = scv; } else { if (scv > scalers.lData[r1]) // this is a _larger_ scaling factor buffer[r1] += currentRateWeight * buffer[r2] * acquireScalerMultiplier (scv - scalers.lData[r1]); else // same scaling factors buffer[r1] += currentRateWeight * buffer[r2]; } siteCorrectors++; } else buffer[r1] += currentRateWeight * buffer[r2]; } } else // runMode = _hyphyLFConditionProbsMaxProbClass { for (long r1 = blockLength*2, r2 = blockLength, r3 = 0; r3 < blockLength; r1++,r2++,r3++) { bool doChange = false; if (siteCorrectors) { long scv = *siteCorrectors, diff = scv - scalers.lData[r3]; if (diff<0) // this has a _smaller_ scaling factor { _Parameter scaled = buffer[r1]*acquireScalerMultiplier (diff); if (buffer[r2] > scaled) doChange = true; else buffer[r1] = scaled; scalers.lData[r3] = scv; } else { if (diff>0) // this is a _larger_ scaling factor buffer[r2] *= acquireScalerMultiplier (-diff); doChange = buffer[r2] > buffer[r1] && ! CheckEqual (buffer[r2],buffer[r1]); } siteCorrectors++; } else doChange = buffer[r2] > buffer[r1] && ! CheckEqual (buffer[r2],buffer[r1]); if (doChange) { buffer[r1] = buffer[r2]; buffer[r3] = useThisPartitonIndex; } } } } } #ifdef __HYPHYMPI__ if (--mpiTasksSent == 0) break; #endif } } #ifdef __HYPHYMPI__ DeleteObject (computedWeights); #endif DeleteObject (catWeigths); }