コード例 #1
0
ファイル: individual.cpp プロジェクト: rekepalli/garli
void Individual::RefineStartingConditions(bool optModel, FLOAT_TYPE branchPrec){
	bool optOmega, optAlpha, optFlex, optPinv, optFreqs, optRelRates, optSubsetRates;
	optOmega = optAlpha = optFlex = optPinv = optFreqs = optRelRates = optSubsetRates = false;

	if(optModel){
		for(int modnum = 0;modnum < modPart.NumModels();modnum++){
			Model *mod = modPart.GetModel(modnum);
			const ModelSpecification *modSpec = mod->GetCorrespondingSpec();
			if(modSpec->numRateCats > 1 && modSpec->IsNonsynonymousRateHet() == false && modSpec->IsFlexRateHet() == false) optAlpha = true;
			if(modSpec->IsFlexRateHet()) optFlex = true;
			if(modSpec->includeInvariantSites && modSpec->fixInvariantSites == false) optPinv = true;
			if(modSpec->IsCodon()) optOmega = true;

#ifdef MORE_DETERM_OPT
			if(modSpec->IsCodon() == false && modSpec->fixStateFreqs == false && modSpec->IsEqualStateFrequencies() == false && modSpec->IsEmpiricalStateFrequencies() == false)
				optFreqs = true;
			if(modSpec->fixRelativeRates == false && (modSpec->Nst() > 1))
				optRelRates = true;
#endif

			}
		if(modSpecSet.InferSubsetRates() && modSpecSet.NumSpecs() > 1)
			optSubsetRates = true;
		}

	outman.UserMessageNoCR("optimizing: starting branch lengths");
	if(optAlpha) outman.UserMessageNoCR(", alpha shape");
	if(optPinv) outman.UserMessageNoCR(", prop. invar");
#ifdef MORE_DETERM_OPT
	if(optRelRates) outman.UserMessageNoCR(", rel rates");
	if(optFreqs) outman.UserMessageNoCR(", eq freqs");
#endif
	if(optOmega) outman.UserMessageNoCR(", dN/dS (aka omega) parameters");
	if(optSubsetRates) outman.UserMessageNoCR(", subset rates");
	outman.UserMessage("...");
	FLOAT_TYPE improve=(FLOAT_TYPE)999.9;
	CalcFitness(0);

	for(int i=1;improve > branchPrec;i++){
		FLOAT_TYPE alphaOptImprove=0.0, pinvOptImprove = 0.0, omegaOptImprove = 0.0, flexOptImprove = 0.0, optImprove=0.0, scaleOptImprove=0.0, subsetRateImprove=0.0, rateOptImprove=0.0;
		FLOAT_TYPE freqOptImprove=0.0;
		
		CalcFitness(0);
		FLOAT_TYPE passStart=Fitness();
		
		optImprove=treeStruct->OptimizeAllBranches(branchPrec);
		CalcFitness(0);

		FLOAT_TYPE trueImprove= Fitness() - passStart;
		assert(trueImprove >= -1.0);
		if(trueImprove < ZERO_POINT_ZERO) trueImprove = ZERO_POINT_ZERO;

		scaleOptImprove=treeStruct->OptimizeTreeScale(branchPrec);
		//if some of the branch lengths are at the minimum or maximum boundaries the scale optimization
		//can actually worsen the score.  This isn't particularly important during initial refinement,
		//so just hide it to keep the user from thinking that there is something terribly wrong
		if(scaleOptImprove < ZERO_POINT_ZERO) scaleOptImprove = ZERO_POINT_ZERO;

		CalcFitness(0);
		if(optModel){
			for(int modnum = 0;modnum < modPart.NumModels();modnum++){
				Model *mod = modPart.GetModel(modnum);
				const ModelSpecification *modSpec = mod->GetCorrespondingSpec();
				if(modSpec->IsCodon())//optimize omega even if there is only 1
					omegaOptImprove += treeStruct->OptimizeOmegaParameters(branchPrec, modnum);
				else if(mod->NRateCats() > 1){
					if(modSpec->IsFlexRateHet()){//Flex rates
						//no longer doing alpha first, it was too hard to know if the flex rates had been partially optimized
						//already during making of a stepwise tree
						//if(i == 1) rateOptImprove = treeStruct->OptimizeAlpha(branchPrec);
						//if(i == 1 && modSpec.gotFlexFromFile==false) rateOptImprove = treeStruct->OptimizeBoundedParameter(branchPrec, mod->Alpha(), 0, 1.0e-8, 999.9, &Model::SetAlpha);
						flexOptImprove += treeStruct->OptimizeFlexRates(branchPrec, modnum);
						}
					else if(modSpec->fixAlpha == false){//normal gamma
						//rateOptImprove = treeStruct->OptimizeAlpha(branchPrec);
						//do NOT let alpha go too low here - on bad or random starting trees the branch lengths get crazy long
						//rateOptImprove = treeStruct->OptimizeBoundedParameter(branchPrec, mod->Alpha(), 0, 1.0e-8, 999.9, &Model::SetAlpha);
						alphaOptImprove += treeStruct->OptimizeBoundedParameter(branchPrec, mod->Alpha(), 0, 0.05, 999.9, modnum, &Model::SetAlpha);
						}
					}
				if(modSpec->includeInvariantSites && !modSpec->fixInvariantSites)
					pinvOptImprove += treeStruct->OptimizeBoundedParameter(branchPrec, mod->PropInvar(), 0, 1.0e-8, mod->maxPropInvar, modnum, &Model::SetPinv);

#ifdef MORE_DETERM_OPT
				if(modSpec->IsCodon() == false && modSpec->fixStateFreqs == false && modSpec->IsEqualStateFrequencies() == false && modSpec->IsEmpiricalStateFrequencies() == false)
					freqOptImprove += treeStruct->OptimizeEquilibriumFreqs(branchPrec, modnum);
				if(modSpec->fixRelativeRates == false && (modSpec->Nst() > 1))
					rateOptImprove += treeStruct->OptimizeRelativeNucRates(branchPrec, modnum);
#endif
				}
			if(optSubsetRates){
				subsetRateImprove += treeStruct->OptimizeSubsetRates(branchPrec);
				}
			}
		improve=scaleOptImprove + trueImprove + alphaOptImprove + pinvOptImprove + flexOptImprove + omegaOptImprove + subsetRateImprove;
		outman.precision(8);
		outman.UserMessageNoCR("pass%2d:+%9.3f (branch=%7.2f scale=%6.2f", i, improve, trueImprove, scaleOptImprove);
		if(optOmega) outman.UserMessageNoCR(" omega=%6.2f", omegaOptImprove);
		if(optAlpha) outman.UserMessageNoCR(" alpha=%6.2f", alphaOptImprove);

#ifdef MORE_DETERM_OPT
		if(optFreqs) outman.UserMessageNoCR(" freqs=%6.2f", freqOptImprove);
		if(optRelRates) outman.UserMessageNoCR(" rel rates=%6.2f", rateOptImprove);
#endif

		if(optFlex) outman.UserMessageNoCR(" flex=%6.2f", flexOptImprove);
		if(optPinv) outman.UserMessageNoCR(" pinv=%6.2f", pinvOptImprove);
		if(optSubsetRates) outman.UserMessageNoCR(" subset rates=%6.2f", subsetRateImprove);
		outman.UserMessage(")");
		}

	treeStruct->nodeOptVector.clear();
	}
コード例 #2
0
ファイル: individual.cpp プロジェクト: rekepalli/garli
void Individual::MakeStepwiseTree(int nTax, int attachesPerTaxon, FLOAT_TYPE optPrecision ){
	treeStruct=new Tree();
	treeStruct->modPart = &modPart;
	treeStruct->AssignCLAsFromMaster();

	Individual scratchI;
	scratchI.treeStruct=new Tree();
	Tree *scratchT = scratchI.treeStruct;
	scratchT->modPart = &scratchI.modPart;
	scratchT->AssignCLAsFromMaster();
	scratchI.CopySecByRearrangingNodesOfFirst(scratchT, this, true);

	int n = nTax;
	Set taxset(n);
	for( int i = 1; i <= n; i++ )
		taxset += i;
		
	int placeInAllNodes=n+1;
//	ofstream stepout("stepwise.log");
	outman.UserMessage("number of taxa added:");

	Bipartition mask;//mask is used for constrained trees
	for(int i = 0;i<3;i++){//add the first 3
		int pos = rnd.random_int( taxset.Size() );
		int k = taxset[pos];
		if(treeStruct->constraints.empty())
			scratchT->AddRandomNode(k, placeInAllNodes  );
		else
			scratchT->AddRandomNodeWithConstraints(k, placeInAllNodes, &mask );
		taxset -= k;
		}
	//use information on the similarity between sequences to choose first stepwise additions
/*	
	const SequenceData *dat = treeStruct->data;
	int nstates = mod->NStates();
	FLOAT_TYPE **pdist = New2DArray<FLOAT_TYPE>(dat->NTax(), dat->NTax());
	for(int i=0;i<nTax;i++){
		pdist[i][i] = 0.0;
		for(int j=i+1;j<nTax;j++){
			pdist[i][j] = CalculateHammingDistance((char*) dat->GetRow(i), (char*) dat->GetRow(j), dat->GetCounts(), dat->NChar(), nstates);
			pdist[j][i] = pdist[i][j];
			}
		}
	//add the first 3
	//be careful because the taxa are indexed from 1->ntax
	int pos = rnd.random_int( taxset.Size() );
	int first = (taxset[pos]);
	scratchT->AddRandomNode(first, placeInAllNodes  );
	taxset -= first;
	
	//add the furthest taxon to that
	int sec = 1;
	FLOAT_TYPE maxDist = pdist[first-1][sec-1];
	for(int i=sec+1;i<=dat->NTax();i++){
		if(pdist[first-1][i-1] > maxDist){
			sec = i; 
			maxDist = pdist[first-1][sec-1];
			}
		}
	scratchT->AddRandomNode(sec, placeInAllNodes  );
	taxset -= sec;
	//add the furthest taxon to that (which may in fact be close to first, but should not have a pdist = 0 to it)
	int third = (first == 1 ? 2 : 1);
	maxDist = pdist[sec-1][third-1];
	for(int i=third+1;i<=dat->NTax();i++){
		if(pdist[sec-1][i] > maxDist && i != first && pdist[first-1][third-1] > ZERO_POINT_ZERO){
			third = i; 
			maxDist = pdist[sec-1][third-1];
			}
		}
	scratchT->AddRandomNode(third, placeInAllNodes  );
	taxset -= third;
*/
	CopySecByRearrangingNodesOfFirst(treeStruct, &scratchI, true);

	for( int i = 3; i < n; i++ ) {
		//select a random node
		int pos = rnd.random_int( taxset.Size() );
		int k = taxset[pos];
		taxset -= k;
		//add the node randomly - this is a little odd, but for the existing swap collecting machinery
		//to work right, the taxon to be added needs to already be in the tree
		if(treeStruct->constraints.empty())
			scratchT->AddRandomNode(k, placeInAllNodes  );
		else
			scratchT->AddRandomNodeWithConstraints(k, placeInAllNodes, &mask );
		TreeNode *added = scratchT->allNodes[k];

		scratchT->SweepDirtynessOverTree(added);
		scratchT->OptimizeBranchesWithinRadius(added->anc, optPrecision, 0, NULL);

		//backup what we have now
		CopySecByRearrangingNodesOfFirst(treeStruct, &scratchI, true);
		FLOAT_TYPE bestScore = scratchT->lnL;
		
		//collect reconnection points - this will automatically filter for constraints
		scratchT->GatherValidReconnectionNodes(scratchT->NTax()*2, added, NULL, &mask);
		
//			stepout << i << "\t" << k << "\t" << bestScore << "\t";

		//start swappin
		int num=0;
		//for(list<ReconNode>::iterator b = scratchT->sprRang.begin();b != scratchT->sprRang.end();b++){
		ReconList attempted;
		while(num < attachesPerTaxon && scratchT->sprRang.size() > 0){
			int connectNum = rnd.random_int(scratchT->sprRang.size());
			listIt broken = scratchT->sprRang.NthElement(connectNum);
			//try a reattachment point
			scratchT->SPRMutate(added->nodeNum, &(*broken), optPrecision, 0);
			//record the score
			broken->chooseProb = scratchT->lnL;
			attempted.AddNode(*broken);
			scratchT->sprRang.RemoveNthElement(connectNum);
//			stepout << scratchT->lnL << "\t";
			//restore the tree
			scratchI.CopySecByRearrangingNodesOfFirst(scratchT, this, true);
			num++;
			}
		//now find the best score
		ReconNode *best = NULL;
		
		//For debugging, add to random place, to check correct filtering of attachment points for constraints
/*
		if(attempted.size() != 0)
			best = attempted.RandomReconNode();
*/
		for(list<ReconNode>::iterator b = attempted.begin();b != attempted.end();b++){
			if((*b).chooseProb > bestScore){
				best = &(*b);
				bestScore = (*b).chooseProb;
				}
			}

		//if we didn't find anything better than the initial random attachment we don't need to do anything
		if(best != NULL){
			scratchT->SPRMutate(added->nodeNum, best, optPrecision, 0);
			}
		else scratchT->Score();
		scratchI.CalcFitness(0);

//		stepout << scratchT->lnL << endl;
		CopySecByRearrangingNodesOfFirst(treeStruct, &scratchI, true);

		//outman.UserMessage(" %d %f", i+1, scratchT->lnL);
		outman.UserMessageNoCR(" %d ", i+1);
		outman.flush();
		//when we've added half the taxa optimize alpha, flex or omega 
		if(i == (n/2)){
			FLOAT_TYPE improve = 0.0;
			for(int modnum = 0;modnum < modPart.NumModels();modnum++){
				Model *mod = scratchI.modPart.GetModel(modnum);
				const ModelSpecification *modSpec = mod->GetCorrespondingSpec();
				if(modSpec->IsCodon())//optimize omega even if there is only 1
					improve += scratchT->OptimizeOmegaParameters(optPrecision, modnum);
				else if(mod->NRateCats() > 1){
					if(modSpec->IsFlexRateHet()){//Flex rates
						//no longer doing alpha first, it was too hard to know if the flex rates had been partially optimized
						//already during making of a stepwise tree
						improve += scratchT->OptimizeFlexRates(optPrecision, modnum);
						}
					else if(modSpec->fixAlpha == false){//normal gamma
						//do NOT let alpha go too low here - on bad or random starting trees the branch lengths get crazy long
						improve += scratchT->OptimizeBoundedParameter(optPrecision, mod->Alpha(), 0, 0.05, 999.9, modnum, &Model::SetAlpha);
						}
					}
				if(modSpec->includeInvariantSites && !modSpec->fixInvariantSites)
					improve += scratchT->OptimizeBoundedParameter(optPrecision, mod->PropInvar(), 0, 1.0e-8, mod->maxPropInvar, modnum, &Model::SetPinv);
				}
			if(modSpecSet.InferSubsetRates()){
				improve += scratchT->OptimizeSubsetRates(optPrecision);
				}
			if(!FloatingPointEquals(improve, 0.0, 1e-8)) outman.UserMessage("\n   Optimizing parameters...    improved %8.3f lnL", improve);
		//	this used to depend on param improvement - not sure why
		//	if(rateOptImprove > 0.0){
				scratchT->Score();
				FLOAT_TYPE start=scratchT->lnL;
				scratchT->OptimizeAllBranches(optPrecision);
				FLOAT_TYPE bimprove = scratchT->lnL - start;
				outman.UserMessage("\nOptimizing branchlengths... improved %f lnL", bimprove);
	//			}
			}
		}		

//	stepout.close();
	outman.UserMessage("");
	scratchI.treeStruct->RemoveTreeFromAllClas();
	delete scratchI.treeStruct;
	scratchI.treeStruct=NULL;
	}