void Individual::RefineStartingConditions(bool optModel, FLOAT_TYPE branchPrec){ bool optOmega, optAlpha, optFlex, optPinv, optFreqs, optRelRates, optSubsetRates; optOmega = optAlpha = optFlex = optPinv = optFreqs = optRelRates = optSubsetRates = false; if(optModel){ for(int modnum = 0;modnum < modPart.NumModels();modnum++){ Model *mod = modPart.GetModel(modnum); const ModelSpecification *modSpec = mod->GetCorrespondingSpec(); if(modSpec->numRateCats > 1 && modSpec->IsNonsynonymousRateHet() == false && modSpec->IsFlexRateHet() == false) optAlpha = true; if(modSpec->IsFlexRateHet()) optFlex = true; if(modSpec->includeInvariantSites && modSpec->fixInvariantSites == false) optPinv = true; if(modSpec->IsCodon()) optOmega = true; #ifdef MORE_DETERM_OPT if(modSpec->IsCodon() == false && modSpec->fixStateFreqs == false && modSpec->IsEqualStateFrequencies() == false && modSpec->IsEmpiricalStateFrequencies() == false) optFreqs = true; if(modSpec->fixRelativeRates == false && (modSpec->Nst() > 1)) optRelRates = true; #endif } if(modSpecSet.InferSubsetRates() && modSpecSet.NumSpecs() > 1) optSubsetRates = true; } outman.UserMessageNoCR("optimizing: starting branch lengths"); if(optAlpha) outman.UserMessageNoCR(", alpha shape"); if(optPinv) outman.UserMessageNoCR(", prop. invar"); #ifdef MORE_DETERM_OPT if(optRelRates) outman.UserMessageNoCR(", rel rates"); if(optFreqs) outman.UserMessageNoCR(", eq freqs"); #endif if(optOmega) outman.UserMessageNoCR(", dN/dS (aka omega) parameters"); if(optSubsetRates) outman.UserMessageNoCR(", subset rates"); outman.UserMessage("..."); FLOAT_TYPE improve=(FLOAT_TYPE)999.9; CalcFitness(0); for(int i=1;improve > branchPrec;i++){ FLOAT_TYPE alphaOptImprove=0.0, pinvOptImprove = 0.0, omegaOptImprove = 0.0, flexOptImprove = 0.0, optImprove=0.0, scaleOptImprove=0.0, subsetRateImprove=0.0, rateOptImprove=0.0; FLOAT_TYPE freqOptImprove=0.0; CalcFitness(0); FLOAT_TYPE passStart=Fitness(); optImprove=treeStruct->OptimizeAllBranches(branchPrec); CalcFitness(0); FLOAT_TYPE trueImprove= Fitness() - passStart; assert(trueImprove >= -1.0); if(trueImprove < ZERO_POINT_ZERO) trueImprove = ZERO_POINT_ZERO; scaleOptImprove=treeStruct->OptimizeTreeScale(branchPrec); //if some of the branch lengths are at the minimum or maximum boundaries the scale optimization //can actually worsen the score. This isn't particularly important during initial refinement, //so just hide it to keep the user from thinking that there is something terribly wrong if(scaleOptImprove < ZERO_POINT_ZERO) scaleOptImprove = ZERO_POINT_ZERO; CalcFitness(0); if(optModel){ for(int modnum = 0;modnum < modPart.NumModels();modnum++){ Model *mod = modPart.GetModel(modnum); const ModelSpecification *modSpec = mod->GetCorrespondingSpec(); if(modSpec->IsCodon())//optimize omega even if there is only 1 omegaOptImprove += treeStruct->OptimizeOmegaParameters(branchPrec, modnum); else if(mod->NRateCats() > 1){ if(modSpec->IsFlexRateHet()){//Flex rates //no longer doing alpha first, it was too hard to know if the flex rates had been partially optimized //already during making of a stepwise tree //if(i == 1) rateOptImprove = treeStruct->OptimizeAlpha(branchPrec); //if(i == 1 && modSpec.gotFlexFromFile==false) rateOptImprove = treeStruct->OptimizeBoundedParameter(branchPrec, mod->Alpha(), 0, 1.0e-8, 999.9, &Model::SetAlpha); flexOptImprove += treeStruct->OptimizeFlexRates(branchPrec, modnum); } else if(modSpec->fixAlpha == false){//normal gamma //rateOptImprove = treeStruct->OptimizeAlpha(branchPrec); //do NOT let alpha go too low here - on bad or random starting trees the branch lengths get crazy long //rateOptImprove = treeStruct->OptimizeBoundedParameter(branchPrec, mod->Alpha(), 0, 1.0e-8, 999.9, &Model::SetAlpha); alphaOptImprove += treeStruct->OptimizeBoundedParameter(branchPrec, mod->Alpha(), 0, 0.05, 999.9, modnum, &Model::SetAlpha); } } if(modSpec->includeInvariantSites && !modSpec->fixInvariantSites) pinvOptImprove += treeStruct->OptimizeBoundedParameter(branchPrec, mod->PropInvar(), 0, 1.0e-8, mod->maxPropInvar, modnum, &Model::SetPinv); #ifdef MORE_DETERM_OPT if(modSpec->IsCodon() == false && modSpec->fixStateFreqs == false && modSpec->IsEqualStateFrequencies() == false && modSpec->IsEmpiricalStateFrequencies() == false) freqOptImprove += treeStruct->OptimizeEquilibriumFreqs(branchPrec, modnum); if(modSpec->fixRelativeRates == false && (modSpec->Nst() > 1)) rateOptImprove += treeStruct->OptimizeRelativeNucRates(branchPrec, modnum); #endif } if(optSubsetRates){ subsetRateImprove += treeStruct->OptimizeSubsetRates(branchPrec); } } improve=scaleOptImprove + trueImprove + alphaOptImprove + pinvOptImprove + flexOptImprove + omegaOptImprove + subsetRateImprove; outman.precision(8); outman.UserMessageNoCR("pass%2d:+%9.3f (branch=%7.2f scale=%6.2f", i, improve, trueImprove, scaleOptImprove); if(optOmega) outman.UserMessageNoCR(" omega=%6.2f", omegaOptImprove); if(optAlpha) outman.UserMessageNoCR(" alpha=%6.2f", alphaOptImprove); #ifdef MORE_DETERM_OPT if(optFreqs) outman.UserMessageNoCR(" freqs=%6.2f", freqOptImprove); if(optRelRates) outman.UserMessageNoCR(" rel rates=%6.2f", rateOptImprove); #endif if(optFlex) outman.UserMessageNoCR(" flex=%6.2f", flexOptImprove); if(optPinv) outman.UserMessageNoCR(" pinv=%6.2f", pinvOptImprove); if(optSubsetRates) outman.UserMessageNoCR(" subset rates=%6.2f", subsetRateImprove); outman.UserMessage(")"); } treeStruct->nodeOptVector.clear(); }
void Individual::MakeStepwiseTree(int nTax, int attachesPerTaxon, FLOAT_TYPE optPrecision ){ treeStruct=new Tree(); treeStruct->modPart = &modPart; treeStruct->AssignCLAsFromMaster(); Individual scratchI; scratchI.treeStruct=new Tree(); Tree *scratchT = scratchI.treeStruct; scratchT->modPart = &scratchI.modPart; scratchT->AssignCLAsFromMaster(); scratchI.CopySecByRearrangingNodesOfFirst(scratchT, this, true); int n = nTax; Set taxset(n); for( int i = 1; i <= n; i++ ) taxset += i; int placeInAllNodes=n+1; // ofstream stepout("stepwise.log"); outman.UserMessage("number of taxa added:"); Bipartition mask;//mask is used for constrained trees for(int i = 0;i<3;i++){//add the first 3 int pos = rnd.random_int( taxset.Size() ); int k = taxset[pos]; if(treeStruct->constraints.empty()) scratchT->AddRandomNode(k, placeInAllNodes ); else scratchT->AddRandomNodeWithConstraints(k, placeInAllNodes, &mask ); taxset -= k; } //use information on the similarity between sequences to choose first stepwise additions /* const SequenceData *dat = treeStruct->data; int nstates = mod->NStates(); FLOAT_TYPE **pdist = New2DArray<FLOAT_TYPE>(dat->NTax(), dat->NTax()); for(int i=0;i<nTax;i++){ pdist[i][i] = 0.0; for(int j=i+1;j<nTax;j++){ pdist[i][j] = CalculateHammingDistance((char*) dat->GetRow(i), (char*) dat->GetRow(j), dat->GetCounts(), dat->NChar(), nstates); pdist[j][i] = pdist[i][j]; } } //add the first 3 //be careful because the taxa are indexed from 1->ntax int pos = rnd.random_int( taxset.Size() ); int first = (taxset[pos]); scratchT->AddRandomNode(first, placeInAllNodes ); taxset -= first; //add the furthest taxon to that int sec = 1; FLOAT_TYPE maxDist = pdist[first-1][sec-1]; for(int i=sec+1;i<=dat->NTax();i++){ if(pdist[first-1][i-1] > maxDist){ sec = i; maxDist = pdist[first-1][sec-1]; } } scratchT->AddRandomNode(sec, placeInAllNodes ); taxset -= sec; //add the furthest taxon to that (which may in fact be close to first, but should not have a pdist = 0 to it) int third = (first == 1 ? 2 : 1); maxDist = pdist[sec-1][third-1]; for(int i=third+1;i<=dat->NTax();i++){ if(pdist[sec-1][i] > maxDist && i != first && pdist[first-1][third-1] > ZERO_POINT_ZERO){ third = i; maxDist = pdist[sec-1][third-1]; } } scratchT->AddRandomNode(third, placeInAllNodes ); taxset -= third; */ CopySecByRearrangingNodesOfFirst(treeStruct, &scratchI, true); for( int i = 3; i < n; i++ ) { //select a random node int pos = rnd.random_int( taxset.Size() ); int k = taxset[pos]; taxset -= k; //add the node randomly - this is a little odd, but for the existing swap collecting machinery //to work right, the taxon to be added needs to already be in the tree if(treeStruct->constraints.empty()) scratchT->AddRandomNode(k, placeInAllNodes ); else scratchT->AddRandomNodeWithConstraints(k, placeInAllNodes, &mask ); TreeNode *added = scratchT->allNodes[k]; scratchT->SweepDirtynessOverTree(added); scratchT->OptimizeBranchesWithinRadius(added->anc, optPrecision, 0, NULL); //backup what we have now CopySecByRearrangingNodesOfFirst(treeStruct, &scratchI, true); FLOAT_TYPE bestScore = scratchT->lnL; //collect reconnection points - this will automatically filter for constraints scratchT->GatherValidReconnectionNodes(scratchT->NTax()*2, added, NULL, &mask); // stepout << i << "\t" << k << "\t" << bestScore << "\t"; //start swappin int num=0; //for(list<ReconNode>::iterator b = scratchT->sprRang.begin();b != scratchT->sprRang.end();b++){ ReconList attempted; while(num < attachesPerTaxon && scratchT->sprRang.size() > 0){ int connectNum = rnd.random_int(scratchT->sprRang.size()); listIt broken = scratchT->sprRang.NthElement(connectNum); //try a reattachment point scratchT->SPRMutate(added->nodeNum, &(*broken), optPrecision, 0); //record the score broken->chooseProb = scratchT->lnL; attempted.AddNode(*broken); scratchT->sprRang.RemoveNthElement(connectNum); // stepout << scratchT->lnL << "\t"; //restore the tree scratchI.CopySecByRearrangingNodesOfFirst(scratchT, this, true); num++; } //now find the best score ReconNode *best = NULL; //For debugging, add to random place, to check correct filtering of attachment points for constraints /* if(attempted.size() != 0) best = attempted.RandomReconNode(); */ for(list<ReconNode>::iterator b = attempted.begin();b != attempted.end();b++){ if((*b).chooseProb > bestScore){ best = &(*b); bestScore = (*b).chooseProb; } } //if we didn't find anything better than the initial random attachment we don't need to do anything if(best != NULL){ scratchT->SPRMutate(added->nodeNum, best, optPrecision, 0); } else scratchT->Score(); scratchI.CalcFitness(0); // stepout << scratchT->lnL << endl; CopySecByRearrangingNodesOfFirst(treeStruct, &scratchI, true); //outman.UserMessage(" %d %f", i+1, scratchT->lnL); outman.UserMessageNoCR(" %d ", i+1); outman.flush(); //when we've added half the taxa optimize alpha, flex or omega if(i == (n/2)){ FLOAT_TYPE improve = 0.0; for(int modnum = 0;modnum < modPart.NumModels();modnum++){ Model *mod = scratchI.modPart.GetModel(modnum); const ModelSpecification *modSpec = mod->GetCorrespondingSpec(); if(modSpec->IsCodon())//optimize omega even if there is only 1 improve += scratchT->OptimizeOmegaParameters(optPrecision, modnum); else if(mod->NRateCats() > 1){ if(modSpec->IsFlexRateHet()){//Flex rates //no longer doing alpha first, it was too hard to know if the flex rates had been partially optimized //already during making of a stepwise tree improve += scratchT->OptimizeFlexRates(optPrecision, modnum); } else if(modSpec->fixAlpha == false){//normal gamma //do NOT let alpha go too low here - on bad or random starting trees the branch lengths get crazy long improve += scratchT->OptimizeBoundedParameter(optPrecision, mod->Alpha(), 0, 0.05, 999.9, modnum, &Model::SetAlpha); } } if(modSpec->includeInvariantSites && !modSpec->fixInvariantSites) improve += scratchT->OptimizeBoundedParameter(optPrecision, mod->PropInvar(), 0, 1.0e-8, mod->maxPropInvar, modnum, &Model::SetPinv); } if(modSpecSet.InferSubsetRates()){ improve += scratchT->OptimizeSubsetRates(optPrecision); } if(!FloatingPointEquals(improve, 0.0, 1e-8)) outman.UserMessage("\n Optimizing parameters... improved %8.3f lnL", improve); // this used to depend on param improvement - not sure why // if(rateOptImprove > 0.0){ scratchT->Score(); FLOAT_TYPE start=scratchT->lnL; scratchT->OptimizeAllBranches(optPrecision); FLOAT_TYPE bimprove = scratchT->lnL - start; outman.UserMessage("\nOptimizing branchlengths... improved %f lnL", bimprove); // } } } // stepout.close(); outman.UserMessage(""); scratchI.treeStruct->RemoveTreeFromAllClas(); delete scratchI.treeStruct; scratchI.treeStruct=NULL; }