void AbstractMixedSubstitutionModel::setVRates(const Vdouble& vd) { if (vd.size()!=modelsContainer_.size()) throw Exception("AbstractMixedSubstitutionModel::setVRates bad size of Vdouble argument."); for (unsigned int i=0;i<vd.size();i++) vRates_[i]=vd[i]; normalizeVRates(); }
generalGammaDistributionFixedCategories::generalGammaDistributionFixedCategories(const Vdouble& fixedRates, const Vdouble& boundaries, MDOUBLE alpha, MDOUBLE beta) : generalGammaDistribution() { if ((fixedRates.size() + 1) != boundaries.size()) errorMsg::reportError("error in generalGammaDistributionFixedCategories constructor"); _alpha = alpha; _beta = beta; _rates = fixedRates; _bonderi = boundaries; computeRatesProbs(); }
void generalGammaDistributionFixedCategories::setFixedCategories(const Vdouble& fixedBoundaries){ if (fixedBoundaries.size()<2) errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : at least two boundaries are required"); if (fixedBoundaries[0] > 0.0) errorMsg::reportError("Error in generalGammaDistributionFixedCategories::setFixedCategories : first boundary should be zero"); _bonderi = fixedBoundaries; if (_bonderi[_bonderi.size()] > VERYBIG/10000.0) _bonderi[_bonderi.size()] = VERYBIG/10000.0; // to avoid overflow setFixedCategories(); }
Vdouble TreeTemplateTools::getBranchLengths(const Node& node) throw (NodePException) { Vdouble brLen(1); brLen[0] = node.getDistanceToFather(); for (size_t i = 0; i < node.getNumberOfSons(); i++) { Vdouble sonBrLen = getBranchLengths(*node.getSon(i)); for (size_t j = 0; j < sonBrLen.size(); j++) { brLen.push_back(sonBrLen[j]); } } return brLen; }
/////////////////////////////////////////////////////////////////////////////////////////////////////////// //findBestParamManyStarts: Finds the best gammaMixture from many starting points. //The function starts form few starting points. //For each point it tries to optimize the likellihood doing only a small number of iterations. //It then picks the best points (highest likelihood) and continue the maximization for these points only. //The best gammaMixture is stored in _sp and the best likelihood is returned. //input Parameters: //startPointsNum = the number of starting points. //bestStartsNum = the number of best points to continue with the full optimization. //startIter = the number of iterations to perform with all starting points. //maxIterations = the maximum number of iterations to continue with the best points //epsilon = for determining convergence in the maximization process. MDOUBLE optGammaMixtureEM::findBestParamManyStarts(const int startPointsNum, const int bestStartsNum, const int startIter, const int maxIterations, const MDOUBLE epsilon, const MDOUBLE epsilomQopt, ofstream* pOutF) { vector<mixtureDistribution> distVec; Vdouble likelihoodVec(startPointsNum); mixtureDistribution * pMixture = static_cast<mixtureDistribution*>(_pSp->distr()); //create starting distributions int i; for (i = 0; i < startPointsNum; ++i) { //the first distribution will be the current one if (i == 0) distVec.push_back(*pMixture); else distVec.push_back(mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15)); } //make a small number of iterations for all random starts for (i = 0; i < distVec.size(); ++i) { likelihoodVec[i] = optimizeParam(&distVec[i], startIter, epsilon, epsilomQopt, pOutF); } //sort results and make full optimization only on the best starts Vdouble sortedL = likelihoodVec; sort(sortedL.begin(),sortedL.end()); MDOUBLE threshold = sortedL[sortedL.size()- bestStartsNum]; MDOUBLE bestL = sortedL[0]; int bestDistNum = 0; for (i = 0; i < distVec.size(); ++i) { if (likelihoodVec[i] >= threshold) { MDOUBLE newL = optimizeParam(&distVec[i], maxIterations, epsilon, epsilomQopt, pOutF); if (newL > bestL) { bestL = newL; bestDistNum = i; } } } _pSp->setDistribution(&distVec[bestDistNum]); distVec.clear(); return bestL; }
//Input: alf = the alpha parameter of the Laguerre polynomials // pointsNum = the polynom order //Output: the abscissas and weights are stored in the vecotrs x and w, respectively. //Discreption: given alf, the alpha parameter of the Laguerre polynomials, the function returns the abscissas and weights // of the n-point Guass-Laguerre quadrature formula. // The smallest abscissa is stored in x[0], the largest in x[pointsNum - 1]. void GLaguer::gaulag(Vdouble &x, Vdouble &w, const MDOUBLE alf, const int pointsNum) { x.resize(pointsNum, 0.0); w.resize(pointsNum, 0.0); const int MAXIT=10000; const MDOUBLE EPS=1.0e-6; int i,its,j; MDOUBLE ai,p1,p2,p3,pp,z=0.0,z1; int n= x.size(); for (i=0;i<n;i++) { //loops over the desired roots if (i == 0) { //initial guess for the smallest root z=(1.0+alf)*(3.0+0.92*alf)/(1.0+2.4*n+1.8*alf); } else if (i == 1) {//initial guess for the second smallest root z += (15.0+6.25*alf)/(1.0+0.9*alf+2.5*n); } else { //initial guess for the other roots ai=i-1; z += ((1.0+2.55*ai)/(1.9*ai)+1.26*ai*alf/ (1.0+3.5*ai))*(z-x[i-2])/(1.0+0.3*alf); } for (its=0;its<MAXIT;its++) { //refinement by Newton's method p1=1.0; p2=0.0; for (j=0;j<n;j++) { //Loop up the recurrence relation to get the Laguerre polynomial evaluated at z. p3=p2; p2=p1; p1=((2*j+1+alf-z)*p2-(j+alf)*p3)/(j+1); } //p1 is now the desired Laguerre polynomial. We next compute pp, its derivative, //by a standard relation involving also p2, the polynomial of one lower order. pp=(n*p1-(n+alf)*p2)/z; z1=z; z=z1-p1/pp; //Newton's formula if (fabs(z-z1) <= EPS) break; } if (its >= MAXIT) errorMsg::reportError("too many iterations in gaulag"); x[i]=z; w[i] = -exp(gammln(alf+n)-gammln(MDOUBLE(n)))/(pp*n*p2); } }
Vdouble TreeTools::getBranchLengths(const Tree& tree, int nodeId) throw (NodeNotFoundException, NodeException) { if (!tree.hasNode(nodeId)) throw NodeNotFoundException("TreeTools::getBranchLengths", nodeId); Vdouble brLen(1); if (tree.hasDistanceToFather(nodeId)) brLen[0] = tree.getDistanceToFather(nodeId); else throw NodeException("TreeTools::getbranchLengths(). No branch length.", nodeId); vector<int> sons = tree.getSonsId(nodeId); for (size_t i = 0; i < sons.size(); i++) { Vdouble sonBrLen = getBranchLengths(tree, sons[i]); for (size_t j = 0; j < sonBrLen.size(); j++) { brLen.push_back(sonBrLen[j]); } } return brLen; }
// a file with color-coding from Ka/Ks values to color-bins void kaks2Color(const Vdouble & kaksVec, const Vdouble &lowerBoundV, const sequence & refSeq, string fileName,codon *co) { vector<int> colors; int numOfSitesinAln = kaksVec.size(); Vdouble negativesKaksVec,negativesSite; negativesKaksVec.clear(); negativesSite.clear(); int i,gapsInRefSeq=0; for (i=0;i<numOfSitesinAln;i++){ if (codonUtility::aaOf(refSeq[i],*co) == -1) gapsInRefSeq++; } // first dealing with positive selection colors.resize(numOfSitesinAln-gapsInRefSeq); int gap=0; for (i=0;i<numOfSitesinAln;i++){ if (codonUtility::aaOf(refSeq[i],*co) == -1){ gap++; continue; } if (lowerBoundV[i]>1) // color 1 (positive selection) : if confidence interval lower bound > 1 colors[i-gap]=1; else if (kaksVec[i]>1) // color 2(positive selection) : "non-significant" colors[i-gap]=2; else { negativesKaksVec.push_back(kaksVec[i]); //add the value of kaks < 1 negativesSite.push_back(i-gap); //add the number of site of the kaks } } // now dealing with purifying selection Vdouble orderVec = negativesKaksVec; if (orderVec.size()>0) // this is since once the whole protein was positive selection... (anomaly) sort(orderVec.begin(), orderVec.end()); //sort the kaks values to be divided to 5 groups MDOUBLE percentileNum = 5.0; int percentileNumInt = 5; Vdouble maxScoreForPercentile(percentileNumInt); if (orderVec.size()>0) { maxScoreForPercentile[0] = orderVec[0]; for (int c = 1; c < percentileNumInt; ++c){ int place = (int)((c / percentileNum) * negativesKaksVec.size()); MDOUBLE maxScore = orderVec[place]; maxScoreForPercentile[c] = maxScore; } } //loop over all the Ka/Ks < 1 for (int j=0; j < negativesKaksVec.size(); ++j){ MDOUBLE r = negativesKaksVec[j]; //the kaks of the site. int s = (int)negativesSite[j]; //the site. if (r > maxScoreForPercentile[4]) colors[s] = 3; else if (r > maxScoreForPercentile[3]) colors[s] = 4; else if (r> maxScoreForPercentile[2]) colors[s] = 5; else if (r > maxScoreForPercentile[1]) colors[s] = 6; else if (r >= maxScoreForPercentile[0]) colors[s] = 7; } //print to file ofstream out(fileName.c_str()); gap=0; amino aminoAcid; LOG(5,<<"Printing selection color bins to file"<<endl); for (i=0;i<refSeq.seqLen();i++){ int aa = codonUtility::aaOf(refSeq[i], *co); if (aa==-1){ gap++; continue; } string aaStr = aminoAcid.fromInt(aa); out<<i+1-gap <<"\t"<<aaStr<<"\t"<<colors[i-gap]; out<<endl; } out.close(); }