Exemple #1
0
void			_LikelihoodFunction::ComputeSiteLikelihoodsForABlock	(long index, _Parameter* results, _SimpleList& scalers, long branchIndex, _SimpleList* branchValues, char mpiRunMode)
// assumes that results is at least blockLength slots long
{
	if (blockDependancies.lData[index])
		PopulateConditionalProbabilities(index, mpiRunMode == _hyphyLFMPIModeREL ?_hyphyLFConditionMPIIterate:_hyphyLFConditionProbsWeightedSum, results, scalers, branchIndex, branchValues);	
	else
	{
		ComputeBlock		(index, results, -1, branchIndex, branchValues);
		scalers.Clear		();
		scalers.Duplicate   (siteCorrections(index));
	}
}
Observations
MultivariateModel
::SimulateData(io::DataSettings &DS) 
{
  /// This function simulates observations (Patients and their measurements y_ij at different time points t_ij) 
  /// according to the model, with a given noise level e_ij, such that y_ij = f(t_ij) + e_ij
  /// Their is two option: 
  /// 1) The model is not initialized (neither random variables of number of realizations) and it has to be
  /// This case corresponds to simulated data used to test the model, meaning if it can recover the random variables
  /// used to simulate the data
  /// 2) The model is already initialized, thus it should rely on its current state (random variables, realizations, ...)
  /// This case corresponds to new data, for a dataset augmentation for instance
  
  // TODO :
  /// PITFALL  : As for now, only the first option is implemented
  /// PITFALL2 : Take a closer look at / merge with InitializeFakeRandomVariables 
  
  
  /// Initialize the model
  m_ManifoldDimension = DS.GetCognitiveScoresDimension();
  m_NumberOfSubjects  = DS.GetNumberOfSimulatedSubjects();
  
  m_RealizationsPerRandomVariable["G"] = 1;
  
  for(size_t i = 1; i < m_ManifoldDimension; ++i)
    m_RealizationsPerRandomVariable["Delta#" + std::to_string(i)] = 1;

  for(size_t i = 0; i <  m_NbIndependentSources*(m_ManifoldDimension - 1); ++i)
    m_RealizationsPerRandomVariable["Beta#" + std::to_string(i)] = 1;

  m_RealizationsPerRandomVariable["Ksi"] = m_NumberOfSubjects;
  m_RealizationsPerRandomVariable["Tau"] = m_NumberOfSubjects;

  for(int i = 0; i < m_NbIndependentSources; ++i)
    m_RealizationsPerRandomVariable["S#" + std::to_string(i)] = m_NumberOfSubjects;

  auto R = SimulateRealizations();
  
  /// Update the model
  m_G = exp(R.at("G", 0));
  ComputeDeltas(R);
  ComputeOrthonormalBasis();
  ComputeAMatrix(R);
  ComputeSpaceShifts(R);
  ComputeBlock(R);
  
  /// Simulate the data
  std::random_device RD;
  std::mt19937 RNG(RD());
  std::uniform_int_distribution<int> Uni(DS.GetMinimumNumberOfObservations(), DS.GetMaximumNumberOfObservations());
  UniformRandomVariable NumberOfTimePoints(60, 95);
  GaussianRandomVariable Noise(0, m_Noise->GetVariance());
  
  /// Simulate the data
  Observations Obs;
  for(int i = 0; i < m_NumberOfSubjects; ++i)
  { 
    /// Get a random number of timepoints and sort them
    VectorType T = NumberOfTimePoints.Samples(Uni(RNG));
    T.sort();
    m_SubjectTimePoints.push_back(T);
    
    /// Simulate the data base on the time-points
    IndividualObservations IO(T);   
    std::vector<VectorType> Landmarks;
    for(size_t j = 0; j < T.size(); ++j)
    {
      Landmarks.push_back(ComputeParallelCurve(i, j) + Noise.Samples(m_ManifoldDimension));
    }
    
    IO.AddLandmarks(Landmarks);
    Obs.AddIndividualData(IO);
  }
  
  /// Initialize the observation and model attributes
  Obs.InitializeGlobalAttributes();
  m_IndividualObservationDate = Obs.GetObservations();
  m_SumObservations           = Obs.GetTotalSumOfLandmarks();
  m_NbTotalOfObservations     = Obs.GetTotalNumberOfObservations();
  
  return Obs;
  
}
void
MultivariateModel
::UpdateModel(const Realizations &R, int Type,
            const std::vector<std::string, std::allocator<std::string>> Names)
{
  /// Given a list of names (which in fact corresponds to the variables that have potentially changed),
  /// the function updates the parameters associated to these names
  
  
  /// Possible parameters to update, depending on the name being in "vect<> Names"
  bool ComputeG = false;
  bool ComputeDelta = false;
  bool ComputeBasis = false;
  bool ComputeA = false;
  bool ComputeSpaceShift = false;
  bool ComputeBlock_ = false;
  bool IndividualOnly = (Type > -1);
  
  /// Parameters to update, depending on the names called
  for(auto it = Names.begin(); it != Names.end(); ++it)
  {
    std::string Name = it->substr(0, it->find_first_of("#"));
    if(Name == "None")
    {
      continue;
    }
    else if(Name == "Ksi" or Name == "Tau") 
    {
      continue;
    }
    else if("G" == Name)
    {
      IndividualOnly = false;
      ComputeBasis = true;
      ComputeA = true;
      ComputeSpaceShift = true;
      ComputeBlock_ = true;
    }
    else if("Delta" == Name)
    {
      IndividualOnly = false;
      ComputeBasis = true;
      ComputeA = true;
      ComputeSpaceShift = true;
      ComputeBlock_ = true;
    }
    else if("Beta" == Name) 
    {
      IndividualOnly = false;
      ComputeA = true;
      ComputeSpaceShift = true;
    }
    else if("S" == Name)
    {
      ComputeSpaceShift = true;
    }
    else if("All" == Name)
    {
      ComputeSubjectTimePoint(R, -1);
      IndividualOnly = false;
      ComputeG = true;
      ComputeDelta = true;
      ComputeBasis = true;
      ComputeA = true;
      ComputeSpaceShift = true;
      ComputeBlock_ = true;
    } 
    else
    {
      std::cerr << "The realization does not exist in the multivariate model > update model" << std::endl;
    }
  }
  
  
  // TODO : To parse it even faster, update just the coordinates within the names
  if(IndividualOnly) ComputeSubjectTimePoint(R, Type);
  
  if(ComputeG)          m_G = exp(R.at("G", 0));
  if(ComputeDelta)      ComputeDeltas(R);
  if(ComputeBasis)      ComputeOrthonormalBasis();
  if(ComputeA)          ComputeAMatrix(R);
  if(ComputeSpaceShift) ComputeSpaceShifts(R);
  if(ComputeBlock_)     ComputeBlock(R);
}
Exemple #4
0
void			_LikelihoodFunction::PopulateConditionalProbabilities	(long index, char runMode, _Parameter* buffer, _SimpleList& scalers, long branchIndex, _SimpleList* branchValues)
// this function computes site probabilties for each rate class (or something else that involves iterating over rate classes)
// see run options below

// run mode can be one of the following

// _hyphyLFConditionProbsRawMatrixMode : simply   populate an M (number of rate classes) x S (number of site patterns) matrix of conditional likelihoods
//   : expected minimum dimension of buffer is M*S
//	 : scalers will have M*S entries laid out as S for rate class 0, S for rate class 1, .... S for rate class M-1

// _hyphyLFConditionProbsScaledMatrixMode : simply   populate an M (number of rate classes) x S (number of site patterns) and scale to the lowest multiplier
//   : expected minimum dimension of buffer is M*S
//	 : scalers will have S entries

// _hyphyLFConditionProbsWeightedSum : compute  a sum for each site using weighted by the probability of a given category
//   : expected minimum dimension of buffer is 2*S
//	 : scalers will have S entries
//   : **note that the behavior is different if there are HMM (or constant on partition) variables 
//	 : the size of the buffer is S*(N+1), where N is the cumulative number of categories in such variables for this partition
//   : the size of the scaler is also S*N
//	 : the code will behave as _hyphyLFConditionProbsScaledMatrixMode with all other category variables 
//   : summed CONDITIONED on the values of HMM/Constant on partition 

// _hyphyLFConditionProbsMaxProbClass : compute the category index of maximum probability  
//   : expected minimum dimension of buffer is 3*S -- the result goes into offset 0
//	 : scalers will have S entries 

// _hyphyLFConditionProbsClassWeights : compute the weight of each rate class index 
//   : expected minimum dimension of buffer is M
//	 : scalers will have no entries

// _hyphyLFConditionMPIIterate : compute conditional likelihoods of the partition using MPI
//	 : run mode effectively the same as _hyphyLFConditionProbsWeightedSum
{
	_List				*traversalPattern		= (_List*)categoryTraversalTemplate(index),
						*variables			    = (_List*)((*traversalPattern)(0)),
						*catWeigths				= nil;

	_SimpleList			*categoryCounts			= (_SimpleList*)((*traversalPattern)(1)),
						*categoryOffsets		= (_SimpleList*)((*traversalPattern)(2)),
						*hmmAndCOP				= (_SimpleList*)((*traversalPattern)(3)),
						categoryValues			(categoryCounts->lLength,0,0);
	
	long				totalSteps				= categoryOffsets->lData[0] * categoryCounts->lData[0],
						catCount				= variables->lLength-1,
						blockLength				= BlockLength(index),
						hmmCatSize				= hmmAndCOP->Element(-1),
						hmmCatCount				= hmmAndCOP->lLength?(totalSteps/hmmCatSize):0,
						currentHMMCat			= 1,
						arrayDim				;
	
	bool				isTrivial				= variables->lLength == 0,
						switchingHMM			= false;
	
	_CategoryVariable   *catVariable;
	
	
	switch (runMode)
	{
		case _hyphyLFConditionProbsRawMatrixMode:
			arrayDim = catCount*blockLength;
			break;
		case _hyphyLFConditionProbsClassWeights:
			arrayDim = 0;
			break;
		default:
			arrayDim = hmmCatCount?blockLength*hmmCatSize:blockLength;
	}
	
	if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate || runMode == _hyphyLFConditionProbsClassWeights)
	{
		if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate)
		{
			long upperBound = hmmCatCount?hmmAndCOP->Element(-1)*blockLength:blockLength;
			for (long r = 0; r < upperBound; r++)
				buffer[r] = 0.;
		}
		catWeigths = new _List;
	}
	else
		if (runMode == _hyphyLFConditionProbsMaxProbClass)
			for (long r = 0, r2 = 2*blockLength; r < blockLength; r++, r2++)
			{
				buffer[r] = 0.0; buffer[r2] = 0.0;
			}
	
	for					(long currentCat		= 0; currentCat <= catCount; currentCat++)
	{
		(catVariable = ((_CategoryVariable**)(variables->lData))[currentCat])->Refresh();
		catVariable->SetIntervalValue(0,true);
		if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate || runMode == _hyphyLFConditionProbsClassWeights)
			(*catWeigths) << catVariable->GetWeights();
	}
	
		
	scalers.Populate	(arrayDim,0,0);
	
#ifdef __HYPHYMPI__
	_GrowingVector * computedWeights = nil;
	if (runMode == _hyphyLFConditionMPIIterate)
	{
		computedWeights = new _GrowingVector;
	}
	long				mpiTasksSent = 0;
#endif	
	
	for					(long pass = 0; pass < 1+(runMode == _hyphyLFConditionMPIIterate); pass++)
	{
		for					(long currentRateCombo  = 0; currentRateCombo < totalSteps; currentRateCombo++)
		{

			// setting each category variable to its appropriate value 
			
			_Parameter		 currentRateWeight = 1.;
			if (pass == 0)
			{
				if (!isTrivial)
				{
					long remainder = currentRateCombo % categoryCounts->lData[catCount];
					
					if (hmmCatCount)
					{
						currentHMMCat = currentRateCombo / hmmCatCount;
						switchingHMM = (currentRateCombo % hmmCatCount) == 0;
					}
					
					if (currentRateCombo && remainder  == 0)
					{
						categoryValues.lData[catCount] = 0;
						(((_CategoryVariable**)(variables->lData))[catCount])->SetIntervalValue(0);
						for (long uptick = catCount-1; uptick >= 0; uptick --)
						{
							categoryValues.lData[uptick]++;
							if (categoryValues.lData[uptick] == categoryCounts->lData[uptick])
							{
								categoryValues.lData[uptick] = 0;
								(((_CategoryVariable**)(variables->lData))[uptick])->SetIntervalValue(0);
							}
							else
							{
								(((_CategoryVariable**)(variables->lData))[uptick])->SetIntervalValue(categoryValues.lData[uptick]);
								break;
							}
						}
					}
					else
					{
						if (currentRateCombo)
						{
							categoryValues.lData[catCount]++;
							(((_CategoryVariable**)(variables->lData))[catCount])->SetIntervalValue(remainder);
						}
					}
				}
			
				if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionProbsClassWeights || runMode == _hyphyLFConditionMPIIterate)
				{
					for					(long currentCat		= hmmCatCount; currentCat <= catCount; currentCat++)
						currentRateWeight *= ((_Matrix**)catWeigths->lData)[currentCat]->theData[categoryValues.lData[currentCat]];
					
	#ifdef __HYPHYMPI__
					if (runMode == _hyphyLFConditionMPIIterate && pass == 0)
						computedWeights->Store(currentRateWeight);
	#endif				
					if (runMode == _hyphyLFConditionProbsClassWeights)
					{
						buffer [currentRateCombo] = currentRateWeight;
						continue;
					}
					else
						if (currentRateWeight == 0.0) // nothing to do, eh?
							continue;
	#ifdef __HYPHYMPI__
						else
						{
							if (runMode == _hyphyLFConditionMPIIterate)
							{
								SendOffToMPI (currentRateCombo);
								mpiTasksSent ++;
								continue;
							}
						}
	#endif						
				}
			}
			
			long useThisPartitonIndex = currentRateCombo;
			
#ifdef __HYPHYMPI__
			if (runMode == _hyphyLFConditionMPIIterate)
			{
				MPI_Status	   status;
				ReportMPIError(MPI_Recv (resTransferMatrix.theData, resTransferMatrix.GetSize(), MPI_DOUBLE, MPI_ANY_SOURCE , HYPHY_MPI_DATA_TAG, MPI_COMM_WORLD,&status),true);				
				useThisPartitonIndex = status.MPI_SOURCE-1;
				currentRateWeight    = computedWeights->theData[useThisPartitonIndex];
			}
#endif						
			
			// now that the categories are set we can proceed with the computing step
			long			 indexShifter					= blockLength * useThisPartitonIndex;
			long			 *siteCorrectors				= ((_SimpleList**)siteCorrections.lData)[index]->lLength?
															 (((_SimpleList**)siteCorrections.lData)[index]->lData) + indexShifter
															 :nil;
			

			if (runMode == _hyphyLFConditionProbsRawMatrixMode || runMode == _hyphyLFConditionProbsScaledMatrixMode) 
				// populate the matrix of conditionals and scaling factors
			{
				_Parameter	_hprestrict_ *bufferForThisCategory = buffer + indexShifter;

				ComputeBlock	(index, bufferForThisCategory, useThisPartitonIndex, branchIndex, branchValues);
				if (usedCachedResults)
				{
					bool saveFR = forceRecomputation;
					forceRecomputation = true;
					ComputeBlock	(index, bufferForThisCategory, useThisPartitonIndex, branchIndex, branchValues);
					forceRecomputation = saveFR;
				}
				
				if (runMode == _hyphyLFConditionProbsRawMatrixMode)
					for (long p = 0; p < blockLength; p++)
						scalers.lData[p+indexShifter] = siteCorrectors[p];
				else
				{
					if (siteCorrectors)
					{
						for (long r1 = 0; r1 < blockLength; r1++)
						{
							long scv			  = *siteCorrectors,
								 scalerDifference = scv-scalers.lData[r1];
							
							if (scalerDifference > 0) 
							// this class has a _bigger_ scaling factor than at least one other class
							// hence it needs to be scaled down (unless it's the first class)
							{
								if (useThisPartitonIndex==0) //(scalers.lData[r1] == -1)
									scalers.lData[r1] = scv;
								else
									bufferForThisCategory[r1] *= acquireScalerMultiplier (scalerDifference);
							}
							else
							{
								if (scalerDifference < 0) 
								// this class is a smaller scaling factor, i.e. its the biggest among all those
								// considered so far; all other classes need to be scaled down
								{							
									_Parameter scaled = acquireScalerMultiplier (-scalerDifference);
									for (long z = indexShifter+r1-blockLength; z >= 0; z-=blockLength)
										buffer[z] *= scaled;
									
									scalers.lData[r1] = scv;
								}
							}
							siteCorrectors++;
						}
					}
				}
			} 
			else
			{
				if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionProbsMaxProbClass || runMode == _hyphyLFConditionMPIIterate) 
				{
					//if (branchIndex>=0)
					//	((_TheTree*)LocateVar(theTrees.lData[index]))->AddBranchToForcedRecomputeList (branchIndex+((_TheTree*)LocateVar(theTrees.lData[index]))->GetLeafCount());
					
	#ifdef			__HYPHYMPI__
					if (runMode == _hyphyLFConditionMPIIterate)
					{
						long offset = resTransferMatrix.GetVDim();

						for (long k = 0; k < blockLength; k++)
						{
							buffer[blockLength+k] = resTransferMatrix.theData[k];
							siteCorrectors[k]     = resTransferMatrix.theData[k+offset];
						}
					}
					else
						
#endif
					
					ComputeBlock	(index, buffer + (hmmCatCount?hmmCatSize:1)*blockLength, useThisPartitonIndex, branchIndex, branchValues);
					
					if (runMode != _hyphyLFConditionMPIIterate && usedCachedResults)
					{
						bool saveFR = forceRecomputation;
						forceRecomputation = true;
						ComputeBlock	(index, buffer + (hmmCatCount?hmmCatSize:1)*blockLength, useThisPartitonIndex, branchIndex, branchValues);
						forceRecomputation = saveFR;
					}

					
					if (runMode == _hyphyLFConditionProbsWeightedSum || runMode == _hyphyLFConditionMPIIterate)
					{
						long lowerBound  = hmmCatCount?blockLength*currentHMMCat:0,
							 upperBound  = hmmCatCount?blockLength*(1+currentHMMCat):blockLength,
							 lowerBound2 = hmmCatCount?(hmmCatSize*blockLength):blockLength;
									
						
						for (long r1 = lowerBound, r2 = lowerBound2; r1 < upperBound; r1++,r2++)
						{
							if (siteCorrectors)
							{
								long scv = *siteCorrectors;
								
								if (scv < scalers.lData[r1]) // this class has a _smaller_ scaling factor
								{
									buffer[r1] = currentRateWeight * buffer[r2] + buffer[r1] * acquireScalerMultiplier (scalers.lData[r1] - scv);
									scalers.lData[r1] = scv;
								}
								else
								{
									if (scv > scalers.lData[r1]) // this is a _larger_ scaling factor
										buffer[r1] += currentRateWeight * buffer[r2] * acquireScalerMultiplier (scv - scalers.lData[r1]);							
									else // same scaling factors
										buffer[r1] += currentRateWeight * buffer[r2];
								}
								
								siteCorrectors++;
							}
							else
								buffer[r1] += currentRateWeight * buffer[r2];
							
						}		
					}
					else // runMode = _hyphyLFConditionProbsMaxProbClass
					{
						for (long r1 = blockLength*2, r2 = blockLength, r3 = 0; r3 < blockLength; r1++,r2++,r3++)
						{
							bool doChange = false;
							if (siteCorrectors)
							{
								long scv  = *siteCorrectors,
									 diff = scv - scalers.lData[r3];
								
								if (diff<0) // this has a _smaller_ scaling factor
								{
									_Parameter scaled = buffer[r1]*acquireScalerMultiplier (diff);
									if (buffer[r2] > scaled)
										doChange = true;
									else
										buffer[r1] = scaled;
									scalers.lData[r3] = scv;
								}
								else
								{
									if (diff>0) // this is a _larger_ scaling factor
										buffer[r2] *= acquireScalerMultiplier (-diff);		
									doChange = buffer[r2] > buffer[r1] && ! CheckEqual (buffer[r2],buffer[r1]);
								}
								
								siteCorrectors++;
							}
							else
								doChange = buffer[r2] > buffer[r1] && ! CheckEqual (buffer[r2],buffer[r1]);
							
							if (doChange)
							{
								buffer[r1]		   = buffer[r2];
								buffer[r3]         = useThisPartitonIndex;
							}
						}						
					}
				}
			}
#ifdef __HYPHYMPI__
			if (--mpiTasksSent == 0)
				break;
#endif	
		}
	}
#ifdef __HYPHYMPI__
	DeleteObject (computedWeights);
#endif	
	DeleteObject (catWeigths);
}