Beispiel #1
0
static void singleBootstrap(tree *tr, int i, analdef *adef, rawdata *rdta, cruncheddata *cdta)
{
  tr->treeID = i;
  tr->checkPointCounter = 0;
     
  computeNextReplicate(tr, &adef->boot, (int*)NULL, (int*)NULL, FALSE, FALSE);
  
  initModel(tr, rdta, cdta, adef);                                   
         
  getStartingTree(tr, adef);

  computeBIGRAPID(tr, adef, TRUE);

  if(adef->bootstrapBranchLengths)
    {
      switch(tr->rateHetModel)
	{
	case GAMMA:	  
	case GAMMA_I:      
	  modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);		      	    	    
	  break;
	case CAT:    	      	 		  
	  tr->likelihood = unlikely;	       
	  catToGamma(tr, adef);	  
	  initModel(tr, rdta, cdta, adef);
	  if(i == 0)
	    modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);	 
	  else
	    modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);
	  gammaToCat(tr); 		  	      	        	
	  break;
	default:
	  assert(0);
	}
    }  


   printBootstrapResult(tr, adef, TRUE);    
}
Beispiel #2
0
void shSupports(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
{
  double 
    diff,
    *lhVectors[3];

  char 
    bestTreeFileName[1024],
    shSupportFileName[1024];
  
  FILE 
    *f;

  int
    interchanges = 0,
    counter = 0;

  assert(adef->restart);
    
  tr->resample = permutationSH(tr, 1000, 12345);
    
  lhVectors[0] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite);
  lhVectors[1] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite);
  lhVectors[2] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite);
  tr->bInf = (branchInfo*)rax_malloc(sizeof(branchInfo) * (tr->mxtips - 3));       

  initModel(tr, rdta, cdta, adef);        
 
 
  getStartingTree(tr, adef);
  
 
  
  if(adef->useBinaryModelFile)
    {
      readBinaryModel(tr);
      evaluateGenericInitrav(tr, tr->start);
      treeEvaluate(tr, 2);
    }
  else
    modOpt(tr, adef, FALSE, 10.0);
  
  printBothOpen("Time after model optimization: %f\n", gettime() - masterTime);
  
  printBothOpen("Initial Likelihood %f\n\n", tr->likelihood);   
    
  do
    {   
      double 
	lh1,
	lh2;
   
      lh1 = tr->likelihood;

      interchanges = encapsulateNNIs(tr, lhVectors, FALSE);       

      evaluateGeneric(tr, tr->start); 		

      lh2 = tr->likelihood;

      diff = ABS(lh1 - lh2);

      printBothOpen("NNI interchanges %d Likelihood %f\n", interchanges, tr->likelihood);
    }
  while(diff > 0.01);

  printBothOpen("\nFinal Likelihood of NNI-optimized tree: %f\n\n", tr->likelihood);

  setupBranchInfo(tr->start->back, tr, &counter);
  assert(counter == tr->mxtips - 3);
 
  interchanges = encapsulateNNIs(tr, lhVectors, TRUE);
              
  strcpy(bestTreeFileName, workdir); 
  strcat(bestTreeFileName, "RAxML_fastTree.");
  strcat(bestTreeFileName,         run_id); 

  Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE);
    
  f = myfopen(bestTreeFileName, "wb");
  fprintf(f, "%s", tr->tree_string);
  fclose(f);  

  
  strcpy(shSupportFileName, workdir); 
  strcat(shSupportFileName, "RAxML_fastTreeSH_Support.");
  strcat(shSupportFileName,         run_id);
  
  Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, TRUE);
  
  f = myfopen(shSupportFileName, "wb");
  fprintf(f, "%s", tr->tree_string);
  fclose(f);  
      
  printBothOpen("RAxML NNI-optimized tree written to file: %s\n", bestTreeFileName);
  
  printBothOpen("Same tree with SH-like supports written to file: %s\n", shSupportFileName);
  
  printBothOpen("Total execution time: %f\n", gettime() - masterTime);

  exit(0);
}
Beispiel #3
0
void doInference(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
{
  int i, n;

#ifdef _WAYNE_MPI
  int 
    j,
    bestProcess;
#endif

  double loopTime;
  topolRELL_LIST *rl = (topolRELL_LIST *)NULL; 
  int 
    best = -1,
    newBest = -1;
  double 
    bestLH = unlikely; 
  FILE *f;
  char bestTreeFileName[1024]; 
  double overallTime;

  n = adef->multipleRuns;
     
#ifdef _WAYNE_MPI
  if(n % processes != 0)
    n = processes * ((n / processes) + 1);
#endif 

  if(!tr->catOnly)
    {
      rl = (topolRELL_LIST *)rax_malloc(sizeof(topolRELL_LIST));
      initTL(rl, tr, n);
    }

#ifdef _WAYNE_MPI
  long parsimonySeed0 = adef->parsimonySeed;
  n = n / processes;
#endif

  if(adef->rellBootstrap)
    { 
#ifdef _WAYNE_MPI          
      tr->resample = permutationSH(tr, NUM_RELL_BOOTSTRAPS, parsimonySeed0 + 10000 * processID); 
#else     
      tr->resample = permutationSH(tr, NUM_RELL_BOOTSTRAPS, adef->parsimonySeed);        
#endif

       tr->rellTrees = (treeList *)rax_malloc(sizeof(treeList));
       initTreeList(tr->rellTrees, tr, NUM_RELL_BOOTSTRAPS);
    }
  else
    {
      tr->resample = (int *)NULL;
      tr->rellTrees =  (treeList *)NULL;
    }

  for(i = 0; i < n; i++)
    { 
#ifdef _WAYNE_MPI 
      if(i == 0)
        { 
          if(parsimonySeed0 != 0) 
            adef->parsimonySeed = parsimonySeed0 + 10000 * processID;
        }
      j = i + n * processID;
      tr->treeID = j;
#else    
      tr->treeID = i;
#endif

      tr->checkPointCounter = 0;
         
      loopTime = gettime();
                                             
      initModel(tr, rdta, cdta, adef); 

      if(i == 0)
	printBaseFrequencies(tr);
     
      getStartingTree(tr, adef); 
                       
      computeBIGRAPID(tr, adef, TRUE);  

#ifdef _WAYNE_MPI
      if(tr->likelihood > bestLH)
	{
	  best = j;
	  bestLH = tr->likelihood;
	}

      if(!tr->catOnly)
	saveTL(rl, tr, j);
#else
      if(tr->likelihood > bestLH)
	{
	  best = i;
	  bestLH = tr->likelihood;
	}

      if(!tr->catOnly)
	saveTL(rl, tr, i);
#endif

      loopTime = gettime() - loopTime; 
      writeInfoFile(adef, tr, loopTime);
     
    }     
 
  assert(best >= 0);

#ifdef _WAYNE_MPI
  MPI_Barrier(MPI_COMM_WORLD);
  n = n * processes;
#endif

  if(tr->catOnly)
    {
      printBothOpenMPI("\n\nNOT conducting any final model optimizations on all %d trees under CAT-based model ....\n", n);
      printBothOpenMPI("\nREMEMBER that CAT-based likelihood scores are meaningless!\n\n", n);        
#ifdef _WAYNE_MPI
      if(processID != 0)
        {
          MPI_Finalize();
          exit(0);
        }
#endif
    }
  else
    {
      printBothOpenMPI("\n\nConducting final model optimizations on all %d trees under GAMMA-based models ....\n\n", n);
 
#ifdef _WAYNE_MPI
      n = n / processes;
#endif

      if(tr->rateHetModel == GAMMA ||  tr->rateHetModel == GAMMA_I)
	{
	  restoreTL(rl, tr, best);
	  evaluateGenericInitrav(tr, tr->start);
	  if(!adef->useBinaryModelFile)
	    modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); 
	  else
	    {
	      readBinaryModel(tr, adef);
	      evaluateGenericInitrav(tr, tr->start);
	      treeEvaluate(tr, 2);
	    }
	  bestLH = tr->likelihood;
	  tr->likelihoods[best] = tr->likelihood;
	  saveTL(rl, tr, best);
	  tr->treeID = best; 
	  printResult(tr, adef, TRUE);
	  newBest = best;      
	  
	  for(i = 0; i < n; i++)
	    {
#ifdef _WAYNE_MPI
	      j = i + n * processID;
	      if(j != best)
		{
		  restoreTL(rl, tr, j);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 1);
		  tr->likelihoods[j] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    {
		      newBest = j;
		      bestLH = tr->likelihood;		  
		      saveTL(rl, tr, j);
		    }
		  tr->treeID = j;
		  printResult(tr, adef, TRUE);
		}
	      if(n == 1 && processes == 1)
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);	   
	      else	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", j, tr->likelihoods[j], resultFileName, j);
#else	  
	      if(i != best)
		{
		  restoreTL(rl, tr, i);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 1);
		  tr->likelihoods[i] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    {
		      newBest = i;
		      bestLH = tr->likelihood;		  
		      saveTL(rl, tr, i);
		    }
		  tr->treeID = i;
		  printResult(tr, adef, TRUE);
		}

	      
	      if(n == 1)
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);	   
	      else	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", i, tr->likelihoods[i], resultFileName, i);
#endif	    	 
	    }    
	}
      else
	{     
	  catToGamma(tr, adef);
	  
#ifdef _WAYNE_MPI
	  for(i = 0; i < n; i++)
            {
              j = i + n*processID;
	      rl->t[j]->likelihood = unlikely;
            }  
#else
	  for(i = 0; i < n; i++)
	    rl->t[i]->likelihood = unlikely;
#endif
	  
	  initModel(tr, rdta, cdta, adef);
	  
	  restoreTL(rl, tr, best);      
	  
	  resetBranches(tr);
	  evaluateGenericInitrav(tr, tr->start);
	  modOpt(tr, adef, TRUE, adef->likelihoodEpsilon);      
	  tr->likelihoods[best] = tr->likelihood;
	  bestLH = tr->likelihood;     
	  saveTL(rl, tr, best);
	  tr->treeID = best;
	  printResult(tr, adef, TRUE);
	  newBest = best;
	  
	  for(i = 0; i < n; i++)
	    {
#ifdef _WAYNE_MPI
	      j = i + n*processID;
	      if(j != best)
		{
		  restoreTL(rl, tr, j);	    
		  resetBranches(tr);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 2);
		  tr->likelihoods[j] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    { 
		      newBest = j;
		      bestLH = tr->likelihood;		
		      saveTL(rl, tr, j);	  
		    }
		  tr->treeID = j;
		  printResult(tr, adef, TRUE);
		} 
	      
	      if(n == 1 && processes == 1)	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);
	      else
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", j, tr->likelihoods[j], resultFileName, j);
#else
	      if(i != best)
		{
		  restoreTL(rl, tr, i);	    
		  resetBranches(tr);
		  evaluateGenericInitrav(tr, tr->start);
		  treeEvaluate(tr, 2);
		  tr->likelihoods[i] = tr->likelihood;
		  
		  if(tr->likelihood > bestLH)
		    { 
		      newBest = i;
		      bestLH = tr->likelihood;		
		      saveTL(rl, tr, i);	  
		    }
		  tr->treeID = i;
		  printResult(tr, adef, TRUE);
		} 
	      
	      if(n == 1)	    
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName);
	      else
		printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", i, tr->likelihoods[i], resultFileName, i);	   	  
#endif
	    }
	}     
    
      assert(newBest >= 0);

#ifdef _WAYNE_MPI
      if(processes > 1)
	{
	  double 
	    *buffer = (double *)rax_malloc(sizeof(double) * processes);
	  for(i = 0; i < processes; i++)
	    buffer[i] = unlikely;
	  buffer[processID] = bestLH;
	  for(i = 0; i < processes; i++)
	    MPI_Bcast(&buffer[i], 1, MPI_DOUBLE, i, MPI_COMM_WORLD);
	  bestLH = buffer[0];
	  bestProcess = 0;
	  for(i = 1; i < processes; i++)
	    if(buffer[i] > bestLH)
	      {
		bestLH = buffer[i];
		bestProcess = i;
	      }
	  
	  rax_free(buffer);	  	  
	}

      if(processID == bestProcess)
	{
#endif

	  restoreTL(rl, tr, newBest);
	  evaluateGenericInitrav(tr, tr->start);
     
	  printBothOpen("\n\nStarting final GAMMA-based thorough Optimization on tree %d likelihood %f .... \n\n", newBest, tr->likelihoods[newBest]);
	  
	  Thorough = 1;
	  tr->doCutoff = FALSE; 
	  treeOptimizeThorough(tr, 1, 10); 
	  evaluateGenericInitrav(tr, tr->start);
	  
	  printBothOpen("Final GAMMA-based Score of best tree %f\n\n", tr->likelihood); 
	  
	  
	  strcpy(bestTreeFileName, workdir); 
	  strcat(bestTreeFileName, "RAxML_bestTree.");
	  strcat(bestTreeFileName,         run_id);
	  
	  
	  Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE);
	  
	  f = myfopen(bestTreeFileName, "wb");
	  fprintf(f, "%s", tr->tree_string);
	  fclose(f);
	  
	  if(adef->perGeneBranchLengths)
	    printTreePerGene(tr, adef, bestTreeFileName, "w");      
#ifdef _WAYNE_MPI
	}
#endif
    }

  if(adef->rellBootstrap)
    {
      //WARNING the functions below need to be invoked after all other trees have been printed
      //don't move this part of the code further up!

      int
	i;
#ifdef _WAYNE_MPI
      FILE 
	*f = myfopen(rellBootstrapFileNamePID, "wb");      
#else
      FILE 
	*f = myfopen(rellBootstrapFileName, "wb");
#endif
      
      for(i = 0; i < NUM_RELL_BOOTSTRAPS; i++)
	{
	   restoreTreeList(tr->rellTrees, tr, i);
	   Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE);
	   fprintf(f, "%s", tr->tree_string);
	}

      freeTreeList(tr->rellTrees);
      rax_free(tr->rellTrees);
      rax_free(tr->resample);

      fclose(f);

#ifdef _WAYNE_MPI      
      MPI_Barrier(MPI_COMM_WORLD);      
      
      concatenateBSFiles(processes, rellBootstrapFileName);
      removeBSFiles(processes, rellBootstrapFileName);  
      
      MPI_Barrier(MPI_COMM_WORLD); 
      
      if(processID == 0)
	printBothOpen("\nRELL bootstraps written to file %s\n", rellBootstrapFileName);
#else
      printBothOpen("\nRELL bootstraps written to file %s\n", rellBootstrapFileName);    
#endif
    }
  
#ifdef _WAYNE_MPI 
  if(processID == bestProcess)
    {
#endif
      overallTime = gettime() - masterTime;
      
      printBothOpen("Program execution info written to %s\n", infoFileName);
      
      if(!tr->catOnly)
	{
	  printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName);
	  
	  if(adef->perGeneBranchLengths && tr->NumberOfModels > 1)    
	    printBothOpen("Per-Partition branch lengths of best-scoring ML tree written to %s.PARTITION.0 to  %s.PARTITION.%d\n\n", bestTreeFileName,  bestTreeFileName, 
			  tr->NumberOfModels - 1);  
	}
      
      printBothOpen("Overall execution time: %f secs or %f hours or %f days\n\n", overallTime, overallTime/3600.0, overallTime/86400.0);    
#ifdef _WAYNE_MPI 
    }
#endif

  if(!tr->catOnly)
    {
      freeTL(rl);   
      rax_free(rl); 
    }
  
#ifdef _WAYNE_MPI
  MPI_Finalize();
#endif
  exit(0);
}
Beispiel #4
0
void fastSearch(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
{
  double    
    likelihood, 
    startLikelihood,
    *lhVectors[3];

  char 
    bestTreeFileName[1024];
  
  FILE 
    *f;

  int
    model;

 
  
  lhVectors[0] = (double *)NULL;
  lhVectors[1] = (double *)NULL;
  lhVectors[2] = (double *)NULL;  
      
  /* initialize model parameters with standard starting values */

  initModel(tr, rdta, cdta, adef);      

  printBothOpen("Time after init : %f\n", gettime() - masterTime);

  /* 
     compute starting tree, either by reading in a tree specified via -t 
     or by building one 
  */

  getStartingTree(tr, adef);
 
  printBothOpen("Time after init and starting tree: %f\n", gettime() - masterTime);
  
  /* 
     rough model parameter optimization, the log likelihood epsilon should 
     actually be determined based on the initial tree score and not be hard-coded 
  */
  
 if(adef->useBinaryModelFile)
    {
      readBinaryModel(tr);
      evaluateGenericInitrav(tr, tr->start);
      treeEvaluate(tr, 2);
    }
 else
   modOpt(tr, adef, FALSE, 10.0);
  
  printBothOpen("Time after init, starting tree, mod opt: %f\n", gettime() - masterTime);

  /* print out the number of rate categories used for the CAT model, one should 
     use less then the default, e.g., -c 16 works quite well */

  for(model = 0; model < tr->NumberOfModels; model++)
    printBothOpen("Partion %d number of Cats: %d\n", model, tr->partitionData[model].numberOfCategories);

  /* 
     means that we are going to do thorough insertions 
     with real newton-raphson based br-len opt at the three branches 
     adjactent to every insertion point 
  */

  Thorough = 1;

  
  /*
    loop over SPR cycles until the likelihood difference 
     before and after the SPR cycle is <= 0.5 log likelihood units.
     Rather than being hard-coded this should also be determined based on the 
     actual likelihood of the tree 
  */
 
  do
    {      
      startLikelihood = tr->likelihood;
   
      /* conduct a cycle of linear SPRs */

    

      likelihood = linearSPRs(tr, 20, adef->veryFast);          
           
      evaluateGeneric(tr, tr->start); 
      
      /* the NNIs also optimize br-lens of resulting topology a bit */
      encapsulateNNIs(tr, lhVectors, FALSE);                    
 
      printBothOpen("LH after SPRs %f, after NNI %f\n", likelihood, tr->likelihood);
    }
  while(ABS(tr->likelihood - startLikelihood) > 0.5);

 
  
 

  
  /* print out the resulting tree to the RAxML_bestTree. file. 
     note that boosttrapping or doing multiple inferences won't work.
     This thing computes a single tree and that's it */
      
  strcpy(bestTreeFileName, workdir); 
  strcat(bestTreeFileName, "RAxML_fastTree.");
  strcat(bestTreeFileName,         run_id);

 

  Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE);
    
  f = myfopen(bestTreeFileName, "wb");
  fprintf(f, "%s", tr->tree_string);
  fclose(f);  

 
    
  printBothOpen("RAxML fast tree written to file: %s\n", bestTreeFileName);
  
  writeBinaryModel(tr);
  
  printBothOpen("Total execution time: %f\n", gettime() - masterTime);

  printBothOpen("Good bye ... \n");
}