void AbstractMixedSubstitutionModel::setVRates(const Vdouble& vd)
{
  if (vd.size()!=modelsContainer_.size())
    throw Exception("AbstractMixedSubstitutionModel::setVRates  bad size of Vdouble argument.");

  for (unsigned int i=0;i<vd.size();i++)
    vRates_[i]=vd[i];
 
  normalizeVRates();
}
generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta) :
generalGammaDistribution()
{
	if ((fixedRates.size() + 1) !=  boundaries.size())
		errorMsg::reportError("error in generalGammaDistributionFixedCategories constructor");
	_alpha = alpha;
	_beta = beta;
	_rates = fixedRates;
	_bonderi = boundaries;
	computeRatesProbs();
}
void generalGammaDistributionFixedCategories::setFixedCategories(const Vdouble& fixedBoundaries){

	if (fixedBoundaries.size()<2)
		errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : at least two boundaries are required");
	if (fixedBoundaries[0] > 0.0)
		errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero");
	
	_bonderi = fixedBoundaries;
	if (_bonderi[_bonderi.size()] > VERYBIG/10000.0)
		 _bonderi[_bonderi.size()] = VERYBIG/10000.0; // to avoid overflow 

	setFixedCategories();
}
Esempio n. 4
0
Vdouble TreeTemplateTools::getBranchLengths(const Node& node) throw (NodePException)
{
  Vdouble brLen(1);
  brLen[0] = node.getDistanceToFather();
  for (size_t i = 0; i < node.getNumberOfSons(); i++)
  {
    Vdouble sonBrLen = getBranchLengths(*node.getSon(i));
    for (size_t j = 0; j < sonBrLen.size(); j++)
    {
      brLen.push_back(sonBrLen[j]);
    }
  }
  return brLen;
}
Esempio n. 5
0
///////////////////////////////////////////////////////////////////////////////////////////////////////////
//findBestParamManyStarts: Finds the best gammaMixture from many starting points.
//The function starts form few starting points. 
//For each point it tries to optimize the likellihood doing only a small number of iterations.
//It then picks the best points (highest likelihood) and continue the maximization for these points only.
//The best gammaMixture is stored in _sp and the best likelihood is returned.
//input Parameters:
//startPointsNum = the number of starting points.
//bestStartsNum  = the number of best points to continue with the full optimization.
//startIter      = the number of iterations to perform with all starting points.
//maxIterations  = the maximum number of iterations to continue with the best points
//epsilon        = for determining convergence in the maximization process. 
MDOUBLE optGammaMixtureEM::findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF)
{
	vector<mixtureDistribution> distVec;
	Vdouble likelihoodVec(startPointsNum);
	mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr());
	//create starting distributions
	int i;
	for (i = 0; i < startPointsNum; ++i)
	{
		//the first distribution will be the current one
		if (i == 0)
			distVec.push_back(*pMixture); 
		else
			distVec.push_back(mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15)); 
	}

	//make a small number of iterations for all random starts 
	for (i = 0; i < distVec.size(); ++i)
	{
		likelihoodVec[i] = optimizeParam(&distVec[i], startIter, epsilon, epsilomQopt, pOutF);
	}

	//sort results and make full optimization only on the best starts
	Vdouble sortedL = likelihoodVec;
	sort(sortedL.begin(),sortedL.end());
	MDOUBLE threshold = sortedL[sortedL.size()- bestStartsNum];
	MDOUBLE bestL = sortedL[0];
	int bestDistNum = 0;
	for (i = 0; i < distVec.size(); ++i)
	{
		if (likelihoodVec[i] >= threshold) 
		{
			MDOUBLE newL = optimizeParam(&distVec[i], maxIterations, epsilon, epsilomQopt, pOutF);
			if (newL > bestL)
			{
				bestL = newL;
				bestDistNum = i;
			}
		}
	}
	_pSp->setDistribution(&distVec[bestDistNum]);
	distVec.clear();
	return bestL;
}
Esempio n. 6
0
//Input: alf = the alpha parameter of the Laguerre polynomials
//		 pointsNum = the polynom order
//Output: the abscissas and weights are stored in the vecotrs x and w, respectively. 
//Discreption: given alf, the alpha parameter of the Laguerre polynomials, the function returns the abscissas and weights
//			   of the n-point Guass-Laguerre quadrature formula.
//			   The smallest abscissa is stored in x[0], the largest in x[pointsNum - 1].
void GLaguer::gaulag(Vdouble &x, Vdouble  &w, const MDOUBLE alf, const int pointsNum)
{
	x.resize(pointsNum, 0.0);
	w.resize(pointsNum, 0.0);
	const int MAXIT=10000;
	const MDOUBLE EPS=1.0e-6;
	int i,its,j;
	MDOUBLE ai,p1,p2,p3,pp,z=0.0,z1;

	int n= x.size();
	for (i=0;i<n;i++) {
		//loops over the desired roots
		if (i == 0) { //initial guess for the smallest root
			z=(1.0+alf)*(3.0+0.92*alf)/(1.0+2.4*n+1.8*alf);
		} else if (i == 1) {//initial guess for the second smallest root
			z += (15.0+6.25*alf)/(1.0+0.9*alf+2.5*n);
		} else { //initial guess for the other roots
			ai=i-1;
			z += ((1.0+2.55*ai)/(1.9*ai)+1.26*ai*alf/
				(1.0+3.5*ai))*(z-x[i-2])/(1.0+0.3*alf);
		}
		for (its=0;its<MAXIT;its++) { //refinement by Newton's method
			p1=1.0;
			p2=0.0;
			for (j=0;j<n;j++) { //Loop up the recurrence relation to get the Laguerre polynomial evaluated at z.
				p3=p2;
				p2=p1;
				p1=((2*j+1+alf-z)*p2-(j+alf)*p3)/(j+1);
			}
			//p1 is now the desired Laguerre polynomial. We next compute pp, its derivative,
			//by a standard relation involving also p2, the polynomial of one lower order.
			pp=(n*p1-(n+alf)*p2)/z;
			z1=z;
			z=z1-p1/pp; //Newton's formula
			if (fabs(z-z1) <= EPS) 
				break;
		}
		if (its >= MAXIT) 
			errorMsg::reportError("too many iterations in gaulag");
		x[i]=z;
		w[i] = -exp(gammln(alf+n)-gammln(MDOUBLE(n)))/(pp*n*p2);
	}
}
Esempio n. 7
0
Vdouble TreeTools::getBranchLengths(const Tree& tree, int nodeId) throw (NodeNotFoundException, NodeException)
{
    if (!tree.hasNode(nodeId))
        throw NodeNotFoundException("TreeTools::getBranchLengths", nodeId);
    Vdouble brLen(1);
    if (tree.hasDistanceToFather(nodeId))
        brLen[0] = tree.getDistanceToFather(nodeId);
    else
        throw NodeException("TreeTools::getbranchLengths(). No branch length.", nodeId);
    vector<int> sons = tree.getSonsId(nodeId);
    for (size_t i = 0; i < sons.size(); i++)
    {
        Vdouble sonBrLen = getBranchLengths(tree, sons[i]);
        for (size_t j = 0; j < sonBrLen.size(); j++)
        {
            brLen.push_back(sonBrLen[j]);
        }
    }
    return brLen;
}
Esempio n. 8
0
// a file with color-coding from Ka/Ks values to color-bins
void kaks2Color(const Vdouble & kaksVec, const Vdouble &lowerBoundV,
				const sequence & refSeq, string fileName,codon *co) {
	vector<int> colors;
	int numOfSitesinAln = kaksVec.size();
	Vdouble negativesKaksVec,negativesSite;
	negativesKaksVec.clear();
	negativesSite.clear();
	int i,gapsInRefSeq=0;

	for (i=0;i<numOfSitesinAln;i++){
		if (codonUtility::aaOf(refSeq[i],*co) == -1) gapsInRefSeq++; 
	}

	// first dealing with positive selection
	colors.resize(numOfSitesinAln-gapsInRefSeq);
	int gap=0;
	for (i=0;i<numOfSitesinAln;i++){
		if (codonUtility::aaOf(refSeq[i],*co) == -1){
			gap++;
			continue;
		}
		if (lowerBoundV[i]>1) // color 1 (positive selection) : if confidence interval lower bound > 1
			colors[i-gap]=1;
		else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant"
			colors[i-gap]=2;
		else  {
			negativesKaksVec.push_back(kaksVec[i]);  //add the value of kaks < 1
			negativesSite.push_back(i-gap);   //add the number of site of the kaks 
		}
	
	}

	// now dealing with purifying selection
	Vdouble orderVec = negativesKaksVec;
	if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly)
		sort(orderVec.begin(), orderVec.end());  //sort the kaks values to be divided to 5 groups
	MDOUBLE percentileNum = 5.0;
	int percentileNumInt = 5;
	Vdouble maxScoreForPercentile(percentileNumInt);
	if (orderVec.size()>0) {
		maxScoreForPercentile[0] = orderVec[0]; 
		for (int c = 1; c < percentileNumInt; ++c){
			int place = (int)((c / percentileNum) * negativesKaksVec.size());
			MDOUBLE maxScore = orderVec[place];
			maxScoreForPercentile[c] = maxScore;
		}
	}

	//loop over all the Ka/Ks < 1  
	for (int j=0; j < negativesKaksVec.size(); ++j){
			MDOUBLE r = negativesKaksVec[j]; //the kaks of the site.
			int s = (int)negativesSite[j];  //the  site.
			if (r > maxScoreForPercentile[4]) 
					colors[s] = 3;
			else if (r > maxScoreForPercentile[3]) 
					colors[s] = 4;
			else if (r> maxScoreForPercentile[2])
					colors[s] = 5;
			else if (r > maxScoreForPercentile[1])
					colors[s] = 6;
			else if (r >= maxScoreForPercentile[0])
					colors[s] = 7;
	}
	//print to file
	ofstream out(fileName.c_str());
	gap=0;
	amino aminoAcid;
	LOG(5,<<"Printing selection color bins to file"<<endl);
	for (i=0;i<refSeq.seqLen();i++){	 
		int aa = codonUtility::aaOf(refSeq[i], *co);
		if (aa==-1){
			gap++;
			continue;
		}
		string aaStr = aminoAcid.fromInt(aa);
		out<<i+1-gap <<"\t"<<aaStr<<"\t"<<colors[i-gap];
		out<<endl;
	}
	out.close();
}