Exemple #1
0
static void doSPR(tree *tr, state *instate)
{
  nodeptr    
    p = selectRandomSubtree(tr);
  
  /* evaluateGeneric(tr, tr->start, TRUE);
     printf("%f \n", tr->likelihood);*/

  parsimonySPR(p, tr);
  
  /*evaluateGeneric(tr, tr->start, TRUE);
    printf("%f \n", tr->likelihood);*/

  instate->p = p;
  instate->nb  = p->next->back;
  instate->nnb = p->next->next->back;
  
  recordBranchInfo(instate->nb, instate->nbz, instate->tr->numBranches);
  recordBranchInfo(instate->nnb, instate->nnbz, instate->tr->numBranches);

  removeNodeBIG(tr, p,  tr->numBranches);
  instate->q = tr->insertNode;
  instate->r = instate->q->back;
  recordBranchInfo(instate->q, instate->qz, instate->tr->numBranches);

  assert(Thorough == 0);
  
  insertBIG(instate->tr, instate->p, instate->q, instate->tr->numBranches);
  evaluateGeneric(instate->tr, instate->p->next->next, FALSE); 
  /*testInsertBIG(tr, p, tr->insertNode);*/

  printf("%f \n", tr->likelihood);
}
Exemple #2
0
//simple sliding window
static void simpleGammaProposal(state * instate)
{
  //TODO: add safety to max and min values
  double newalpha, curv, r,mx,mn;
  curv = instate->tr->partitionData[instate->model].alpha;
  instate->curAlpha = curv;
  r = (double)rand()/(double)RAND_MAX;
  mn = curv-(instate->gm_sliding_window_w/2);
  mx = curv+(instate->gm_sliding_window_w/2);
  newalpha = fabs(mn + r * (mx-mn));
  /* Ensure always you stay within this range */
  if(newalpha > ALPHA_MAX) newalpha = ALPHA_MAX;
  if(newalpha < ALPHA_MIN) newalpha = ALPHA_MIN;
  instate->tr->partitionData[instate->model].alpha = newalpha;

#ifndef _LOCAL_DISCRETIZATION
  pllMakeGammaCats(instate->tr->partitionData[instate->model].alpha, instate->tr->partitionData[instate->model].gammaRates, 4);
#endif

  /* TODO: for the parallel version: need to broadcast the gamma rates before re-evaluating !!!! 
     also note the _LOCAL_DISCRETIZATION flag that should only be used for the parallel stuff !
   */

  evaluateGeneric(instate->tr, instate->tr->start, TRUE);
}
static boolean restoreTree (topol *tpl, tree *tr)
{ 
  connptr  r;
  nodeptr  p, p0;    
  int  i;

  for (i = 1; i <= 2*(tr->mxtips) - 2; i++) 
    {  
      /* Uses p = p->next at tip */
      p0 = p = tr->nodep[i];
      do 
	{
	  p->back = (nodeptr) NULL;
	  p = p->next;
	} 
      while (p != p0);
    }

  /*  Copy connections from topology */

  for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++)     
    hookup(r->p, r->q, r->z, tr->numBranches);      

  tr->likelihood = tpl->likelihood;
  tr->start      = tpl->start;
  tr->ntips      = tpl->ntips;
  
  tr->nextnode   = tpl->nextnode;    

  evaluateGeneric(tr, tr->start, TRUE);
  return TRUE;
}
void treeEvaluateRandom (tree *tr, double smoothFactor)       
{
 
  smoothTreeRandom(tr, (int)((double)smoothings * smoothFactor));
  

  evaluateGeneric(tr, tr->start);       
}
Exemple #5
0
/*
 * should be sliding window proposal
 */
static void simpleModelProposal(state * instate)
{
  //TODO: add safety to max and min values
  //record the old ones
  recordSubsRates(instate->tr, instate->model, instate->numSubsRates, instate->curSubsRates);
  //choose a random set of model params,
  //probably with dirichlet proposal
  //with uniform probabilities, no need to have other
  int state;
  double new_value,curv;
  double r,mx,mn;
  //using the branch length sliding window for a test
  for(state = 0;state<instate->numSubsRates ; state ++)
    {
      curv = instate->tr->partitionData[instate->model].substRates[state];
      r = (double)rand()/(double)RAND_MAX;
      mn = curv-(instate->rt_sliding_window_w/2);
      mx = curv+(instate->rt_sliding_window_w/2);
      new_value = fabs(mn + r * (mx-mn));
      /* Ensure always you stay within this range */
      if(new_value > RATE_MAX) new_value = RATE_MAX;
      if(new_value < RATE_MIN) new_value = RATE_MIN;
      //printf("%i %f %f\n", state, curv, new_value);
      editSubsRates(instate->tr,instate->model, state, new_value);
    }
  //recalculate eigens
#ifndef _LOCAL_DISCRETIZATION
  pllInitReversibleGTR(instate->tr, instate->model); /* 1. recomputes Eigenvectors, Eigenvalues etc. for Q decomp. */
#endif

  /* TODO: need to broadcast rates here for parallel version ! */

  evaluateGeneric(instate->tr, instate->tr->start, TRUE); /* 2. re-traverse the full tree to update all vectors */
  //TODO: without this, the run will fail after a successful model, but failing SPR
  evaluateGeneric(instate->tr, instate->tr->start, FALSE);
  //for prior, just use dirichlet
  // independent gamma distribution for each parameter
  //the pdf for this is
  // for gamma the prior is gamma

  //for statefreqs should all be uniform

  //only calculate the new ones
}
boolean testInsertRestoreBIG (tree *tr, nodeptr p, nodeptr q)
{    
  if(Thorough)
    {
      if (! insertBIG(tr, p, q, tr->numBranches))       return FALSE;    
      
      evaluateGeneric(tr, p->next->next);               
    }
  else
    {
      if (! insertRestoreBIG(tr, p, q))       return FALSE;
      
      {
	nodeptr x, y;
	x = p->next->next;
	y = p->back;
			
	if(! isTip(x->number, tr->rdta->numsp) && isTip(y->number, tr->rdta->numsp))
	  {
	    while ((! x->x)) 
	      {
		if (! (x->x))
		  newviewGeneric(tr, x);		     
	      }
	  }
	
	if(isTip(x->number, tr->rdta->numsp) && !isTip(y->number, tr->rdta->numsp))
	  {
	    while ((! y->x)) 
	      {		  
		if (! (y->x))
		  newviewGeneric(tr, y);
	      }
	  }
	
	if(!isTip(x->number, tr->rdta->numsp) && !isTip(y->number, tr->rdta->numsp))
	  {
	    while ((! x->x) || (! y->x)) 
	      {
		if (! (x->x))
		  newviewGeneric(tr, x);
		if (! (y->x))
		  newviewGeneric(tr, y);
	      }
	  }				      	
	
      }
	
      tr->likelihood = tr->endLH;
    }
     
  return TRUE;
} 
Exemple #7
0
static void restoreSubsRates(tree *tr, analdef *adef, int model, int numSubsRates, double *prevSubsRates)
{
  assert(tr->partitionData[model].dataType = DNA_DATA);
  int i;
  for(i=0; i<numSubsRates; i++)
    tr->partitionData[model].substRates[i] = prevSubsRates[i];
#ifndef _LOCAL_DISCRETIZATION
  pllInitReversibleGTR(tr, model);
#endif
  /* TODO need to broadcast rates here for parallel version */

  evaluateGeneric(tr, tr->start, TRUE);
}
Exemple #8
0
static void resetSimpleGammaProposal(state * instate)
{
  instate->tr->partitionData[instate->model].alpha = instate->curAlpha;
#ifndef _LOCAL_DISCRETIZATION
  pllMakeGammaCats(instate->tr->partitionData[instate->model].alpha, instate->tr->partitionData[instate->model].gammaRates, 4);
#endif

   /* TODO: for the parallel version: need to broadcast the gamma rates before re-evaluating !!!! 
     also note the _LOCAL_DISCRETIZATION flag that should only be used for the parallel stuff !
   */

  evaluateGeneric(instate->tr, instate->tr->start, TRUE);
}
boolean treeEvaluate (tree *tr, double smoothFactor)       /* Evaluate a user tree */
{
  boolean result;
 
  if(tr->useBrLenScaler)
    assert(0);
 
  result = smoothTree(tr, (int)((double)smoothings * smoothFactor));
  
  assert(result); 

  evaluateGeneric(tr, tr->start);         

  return TRUE;
}
static void quickSmoothLocal(tree *tr, int n)
{
  nodeptr p = tr->insertNode;
  nodeptr q;

  if(n == 0)
    {
      evaluateGeneric(tr, p);
    }
  else
    {
      qsmoothLocal(tr, p->back, n - 1);
      if(!isTip(p->number, tr->rdta->numsp))
	{
	  q = p->next;
	  while(q != p)
	    {
	      qsmoothLocal(tr, q->back, n - 1);
	      q = q->next;
	    }
	}
      evaluateGeneric(tr, p);
    }
}
Exemple #11
0
static double testInsertFast (tree *tr, nodeptr p, nodeptr q, insertions *ins, boolean veryFast)
{
  double  qz[NUM_BRANCHES], pz[NUM_BRANCHES];
  nodeptr  r, s;
  double LH;
  int i;
  
  r = q->back; 
  
  for(i = 0; i < tr->numBranches; i++)
    {
      qz[i] = q->z[i];
      pz[i] = p->z[i];
    }
   
  insertFast(tr, p, q, tr->numBranches);
      
  evaluateGeneric(tr, p->next->next);   

  addInsertion(q, tr->likelihood, ins);
  
  
  if(veryFast)
    if(tr->likelihood > tr->endLH)
      {			  
	tr->insertNode = q;
	tr->removeNode = p;   
	for(i = 0; i < tr->numBranches; i++)
	  tr->currentZQR[i] = tr->zqr[i];      
	tr->endLH = tr->likelihood;                            
      }  
 
  LH = tr->likelihood;                  
              
  hookup(q, r, qz, tr->numBranches);
      
  p->next->next->back = p->next->back = (nodeptr) NULL;
  
  if(Thorough)
    {
      s = p->back;
      hookup(p, s, pz, tr->numBranches);          
    }
      
  return LH;
}
Exemple #12
0
static double testInsertThorough(tree *tr, nodeptr r, nodeptr q, boolean useVector)
{
  double 
    result,           
    qz[NUM_BRANCHES],
    z[NUM_BRANCHES];
  
  nodeptr  
    x = q->back,
    s = r->back;
  
  int     
    j;   

  for(j = 0; j < tr->numBranches; j++)    
    {
      qz[j] = q->z[j];
      z[j] = sqrt(qz[j]); 

      if(z[j] < zmin) 
	z[j] = zmin;
      
      if(z[j] > zmax)
	z[j] = zmax;
    }      	  	 	    	  
    
  hookup(r->next,       q, z, tr->numBranches);
  hookup(r->next->next, x, z, tr->numBranches);
  hookupDefault(r, s, tr->numBranches);      		     
    
  newviewGeneric(tr, r);	     
    
  localSmooth(tr, r, smoothings);
	  
  if(useVector)
    result = evaluateGenericVector(tr, r);
  else
    result = evaluateGeneric(tr, r);	 	       	  	   

  hookup(q, x, qz, tr->numBranches);
      
  r->next->next->back = r->next->back = (nodeptr) NULL; 

  return result;
}
Exemple #13
0
static boolean simpleBranchLengthProposal(state * instate)
{
   
  //for each branch get the current branch length
  //pull a uniform like
  //x = current, w =window
  //uniform(x-w/2,x+w/2)

  update_all_branches(instate, FALSE);
  evaluateGeneric(instate->tr, instate->tr->start, FALSE); /* update the tr->likelihood */

  //for prior, just using exponential for now
  //calculate for each branch length
  // where lambda is chosen and x is the branch length
  //lambda * exp(-lamba * x)

  //only calculate the new ones
  //
  return TRUE;
}
Exemple #14
0
static boolean simpleNodeProposal(state * instate)
{
  //prior is flat for these moves
  instate->newprior = 1;
  instate->p = selectRandomInnerSubtree(instate->tr);
  /* records info pre-pruning */
  instate->nb = instate->p->next->back;
  instate->nnb = instate->p->next->next->back;
  //printBothOpen("selected prune node %db%d bl %f \n", instate->p->number, instate->p->back->number, instate->p->z[0]);
  recordBranchInfo(instate->nb, instate->nbz, instate->tr->numBranches);
  recordBranchInfo(instate->nnb, instate->nnbz, instate->tr->numBranches);
  /* prune subtree p */
  if (removeNodeBIG(instate->tr, instate->p,  instate->tr->numBranches) == NULL) assert(FALSE);
  /* insert somewhere else, but it must not be in the pruned subtree */
  //printBothOpen("pruned %db%d \n", instate->p->number, instate->p->back->number);
  instate->q = (nodeptr) NULL;
  naiveInsertionProposal(instate);
  
  if(instate->q!=NULL)
    {
      instate->r = instate->q->back;
      recordBranchInfo(instate->q, instate->qz, instate->tr->numBranches);
      /*
	printBothOpen("inserted %db%d at %db%d where bl %f, Thorough is %d \n", 
	instate->p->number, instate->p->back->number,
	instate->q->number, instate->q->back->number, 
	instate->q->z[0], Thorough);
      */
      if (! insertBIG(instate->tr, instate->p, instate->q, instate->tr->numBranches)) 
	assert(FALSE);
      //TODO: breaks here evaluateGenericSpecial.c:1164: evaluateIterative: Assertion `partitionLikelihood < 0.0' failed.
      evaluateGeneric(instate->tr, instate->p->next->next, FALSE);    
      return TRUE;
    }
  else
    return FALSE;
}
Exemple #15
0
static void doNNIs(tree *tr, nodeptr p, double *lhVectors[3], boolean shSupport, int *interchanges, int *innerBranches,
		   double *pqz_0, double *pz1_0, double *pz2_0, double *qz1_0, double *qz2_0, double *pqz_1, double *pz1_1, double *pz2_1,
		   double *qz1_1, double *qz2_1, double *pqz_2, double *pz1_2, double *pz2_2, double *qz1_2, double *qz2_2)
{     
  nodeptr 
    q = p->back,     
    pb1 = p->next->back,
    pb2 = p->next->next->back;

  assert(!isTip(p->number, tr->mxtips));     
  
  if(!isTip(q->number, tr->mxtips))
    {	
      int 
	whichNNI = 0;
      
      nodeptr	 
	qb1 = q->next->back,
	qb2 = q->next->next->back;  
      
      double 		 
	lh[3];         

      *innerBranches = *innerBranches + 1;
          
      nniSmooth(tr, p, 16);	
      
      if(shSupport)
	{	   
	  evaluateGenericVector(tr, p);
	  memcpy(lhVectors[0], tr->perSiteLL, sizeof(double) * tr->cdta->endsite);
	}
      else
	evaluateGeneric(tr, p);
      
      lh[0] = tr->likelihood;
            
      storeBranches(tr, p, pqz_0, pz1_0, pz2_0, qz1_0, qz2_0);
                
      /*******************************************/
      
      hookup(p, q, pqz_0, tr->numBranches); 
      
      hookup(p->next,       qb1, qz1_0, tr->numBranches); 
      hookup(p->next->next, pb2, pz2_0, tr->numBranches); 
      
      hookup(q->next,       pb1, pz1_0, tr->numBranches); 	
      hookup(q->next->next, qb2, qz2_0, tr->numBranches); 
      
      newviewGeneric(tr, p);
      newviewGeneric(tr, p->back);
            
      nniSmooth(tr, p, 16);
      
      if(shSupport)
	{
	  evaluateGenericVector(tr, p);
	  memcpy(lhVectors[1], tr->perSiteLL, sizeof(double) * tr->cdta->endsite);
	}
      else
	evaluateGeneric(tr, p);
      
      lh[1] = tr->likelihood;		
      
      storeBranches(tr, p, pqz_1, pz1_1, pz2_1, qz1_1, qz2_1);
      
      if(lh[1] > lh[0])
	whichNNI = 1;
      
      /*******************************************/
      
      hookup(p, q, pqz_0, tr->numBranches); 
      
      hookup(p->next,       qb1, qz1_0, tr->numBranches); 
      hookup(p->next->next, pb1, pz1_0, tr->numBranches); 
	
      hookup(q->next,       pb2, pz2_0, tr->numBranches); 		
      hookup(q->next->next, qb2, qz2_0, tr->numBranches); 
      
      newviewGeneric(tr, p);
      newviewGeneric(tr, p->back);
           
      nniSmooth(tr, p, 16);
      
      if(shSupport)
	{
	  evaluateGenericVector(tr, p);
	  memcpy(lhVectors[2], tr->perSiteLL, sizeof(double) * tr->cdta->endsite);
	}
      else
	evaluateGeneric(tr, p);
      
      lh[2] = tr->likelihood;            
      
      storeBranches(tr, p, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2);	 
      
      if(lh[2] > lh[0] && lh[2] > lh[1])
	whichNNI = 2;
      
      /*******************************************/
      
      if(shSupport)
	whichNNI = 0;

      switch(whichNNI)
	{
	case 0:	 
	  hookup(p, q, pqz_0, tr->numBranches); 	  
	  
	  hookup(p->next,       pb1, pz1_0, tr->numBranches); 
	  hookup(p->next->next, pb2, pz2_0, tr->numBranches); 
	  
	  hookup(q->next,       qb1, qz1_0, tr->numBranches); 	
	  hookup(q->next->next, qb2, qz2_0, tr->numBranches);
	  break;
	case 1:	 
	  hookup(p, q, pqz_1, tr->numBranches); 	    
	  
	  hookup(p->next,       qb1, pz1_1, tr->numBranches); 
	  hookup(p->next->next, pb2, pz2_1, tr->numBranches); 
	  
	  hookup(q->next,       pb1, qz1_1, tr->numBranches); 	
	  hookup(q->next->next, qb2, qz2_1, tr->numBranches); 
	  break;
	case 2:	  
	  hookup(p, q, pqz_2, tr->numBranches); 
	  
	  hookup(p->next,       qb1, pz1_2, tr->numBranches); 
	  hookup(p->next->next, pb1, pz2_2, tr->numBranches); 
	  
	  hookup(q->next,       pb2, qz1_2, tr->numBranches); 		
	  hookup(q->next->next, qb2, qz2_2, tr->numBranches); 
	  break;
	default:
	  assert(0);
	}       
      
      newviewGeneric(tr, p);
      newviewGeneric(tr, q);     
                 
      if(whichNNI > 0)
	*interchanges = *interchanges + 1;
      
      if(shSupport)	  
	p->bInf->support = SHSupport(tr->cdta->endsite, 1000, tr->resample, lh, lhVectors);	   
    }	  

  
  if(!isTip(pb1->number, tr->mxtips))
    doNNIs(tr, pb1, lhVectors, shSupport, interchanges, innerBranches,
	   pqz_0, pz1_0, pz2_0, qz1_0, qz2_0, pqz_1, pz1_1, pz2_1,
	   qz1_1, qz2_1, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2);

  if(!isTip(pb2->number, tr->mxtips))
    doNNIs(tr, pb2, lhVectors, shSupport, interchanges,  innerBranches,
	   pqz_0, pz1_0, pz2_0, qz1_0, qz2_0, pqz_1, pz1_1, pz2_1,
	   qz1_1, qz2_1, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2);   	         
  

  return;
}
Exemple #16
0
void shSupports(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
{
  double 
    diff,
    *lhVectors[3];

  char 
    bestTreeFileName[1024],
    shSupportFileName[1024];
  
  FILE 
    *f;

  int
    interchanges = 0,
    counter = 0;

  assert(adef->restart);
    
  tr->resample = permutationSH(tr, 1000, 12345);
    
  lhVectors[0] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite);
  lhVectors[1] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite);
  lhVectors[2] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite);
  tr->bInf = (branchInfo*)rax_malloc(sizeof(branchInfo) * (tr->mxtips - 3));       

  initModel(tr, rdta, cdta, adef);        
 
 
  getStartingTree(tr, adef);
  
 
  
  if(adef->useBinaryModelFile)
    {
      readBinaryModel(tr);
      evaluateGenericInitrav(tr, tr->start);
      treeEvaluate(tr, 2);
    }
  else
    modOpt(tr, adef, FALSE, 10.0);
  
  printBothOpen("Time after model optimization: %f\n", gettime() - masterTime);
  
  printBothOpen("Initial Likelihood %f\n\n", tr->likelihood);   
    
  do
    {   
      double 
	lh1,
	lh2;
   
      lh1 = tr->likelihood;

      interchanges = encapsulateNNIs(tr, lhVectors, FALSE);       

      evaluateGeneric(tr, tr->start); 		

      lh2 = tr->likelihood;

      diff = ABS(lh1 - lh2);

      printBothOpen("NNI interchanges %d Likelihood %f\n", interchanges, tr->likelihood);
    }
  while(diff > 0.01);

  printBothOpen("\nFinal Likelihood of NNI-optimized tree: %f\n\n", tr->likelihood);

  setupBranchInfo(tr->start->back, tr, &counter);
  assert(counter == tr->mxtips - 3);
 
  interchanges = encapsulateNNIs(tr, lhVectors, TRUE);
              
  strcpy(bestTreeFileName, workdir); 
  strcat(bestTreeFileName, "RAxML_fastTree.");
  strcat(bestTreeFileName,         run_id); 

  Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE);
    
  f = myfopen(bestTreeFileName, "wb");
  fprintf(f, "%s", tr->tree_string);
  fclose(f);  

  
  strcpy(shSupportFileName, workdir); 
  strcat(shSupportFileName, "RAxML_fastTreeSH_Support.");
  strcat(shSupportFileName,         run_id);
  
  Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, TRUE);
  
  f = myfopen(shSupportFileName, "wb");
  fprintf(f, "%s", tr->tree_string);
  fclose(f);  
      
  printBothOpen("RAxML NNI-optimized tree written to file: %s\n", bestTreeFileName);
  
  printBothOpen("Same tree with SH-like supports written to file: %s\n", shSupportFileName);
  
  printBothOpen("Total execution time: %f\n", gettime() - masterTime);

  exit(0);
}
Exemple #17
0
void fastSearch(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta)
{
  double    
    likelihood, 
    startLikelihood,
    *lhVectors[3];

  char 
    bestTreeFileName[1024];
  
  FILE 
    *f;

  int
    model;

 
  
  lhVectors[0] = (double *)NULL;
  lhVectors[1] = (double *)NULL;
  lhVectors[2] = (double *)NULL;  
      
  /* initialize model parameters with standard starting values */

  initModel(tr, rdta, cdta, adef);      

  printBothOpen("Time after init : %f\n", gettime() - masterTime);

  /* 
     compute starting tree, either by reading in a tree specified via -t 
     or by building one 
  */

  getStartingTree(tr, adef);
 
  printBothOpen("Time after init and starting tree: %f\n", gettime() - masterTime);
  
  /* 
     rough model parameter optimization, the log likelihood epsilon should 
     actually be determined based on the initial tree score and not be hard-coded 
  */
  
 if(adef->useBinaryModelFile)
    {
      readBinaryModel(tr);
      evaluateGenericInitrav(tr, tr->start);
      treeEvaluate(tr, 2);
    }
 else
   modOpt(tr, adef, FALSE, 10.0);
  
  printBothOpen("Time after init, starting tree, mod opt: %f\n", gettime() - masterTime);

  /* print out the number of rate categories used for the CAT model, one should 
     use less then the default, e.g., -c 16 works quite well */

  for(model = 0; model < tr->NumberOfModels; model++)
    printBothOpen("Partion %d number of Cats: %d\n", model, tr->partitionData[model].numberOfCategories);

  /* 
     means that we are going to do thorough insertions 
     with real newton-raphson based br-len opt at the three branches 
     adjactent to every insertion point 
  */

  Thorough = 1;

  
  /*
    loop over SPR cycles until the likelihood difference 
     before and after the SPR cycle is <= 0.5 log likelihood units.
     Rather than being hard-coded this should also be determined based on the 
     actual likelihood of the tree 
  */
 
  do
    {      
      startLikelihood = tr->likelihood;
   
      /* conduct a cycle of linear SPRs */

    

      likelihood = linearSPRs(tr, 20, adef->veryFast);          
           
      evaluateGeneric(tr, tr->start); 
      
      /* the NNIs also optimize br-lens of resulting topology a bit */
      encapsulateNNIs(tr, lhVectors, FALSE);                    
 
      printBothOpen("LH after SPRs %f, after NNI %f\n", likelihood, tr->likelihood);
    }
  while(ABS(tr->likelihood - startLikelihood) > 0.5);

 
  
 

  
  /* print out the resulting tree to the RAxML_bestTree. file. 
     note that boosttrapping or doing multiple inferences won't work.
     This thing computes a single tree and that's it */
      
  strcpy(bestTreeFileName, workdir); 
  strcat(bestTreeFileName, "RAxML_fastTree.");
  strcat(bestTreeFileName,         run_id);

 

  Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE);
    
  f = myfopen(bestTreeFileName, "wb");
  fprintf(f, "%s", tr->tree_string);
  fclose(f);  

 
    
  printBothOpen("RAxML fast tree written to file: %s\n", bestTreeFileName);
  
  writeBinaryModel(tr);
  
  printBothOpen("Total execution time: %f\n", gettime() - masterTime);

  printBothOpen("Good bye ... \n");
}
void perSiteLogLikelihoods(tree *tr, double *logLikelihoods)
{
  double 
    likelihood,
    accumulatedPerSiteLikelihood = 0.0;

  size_t
    localCount,
    i,
    globalCounter,
    model,
    lower,
    upper;

  /* compute the likelihood of the tree with the standard function to:
     1. obtain the current score for error checking
     2. store a full tree traversal in the traversal descriptor that 
     will then be used for calculating per-site log likelihoods 
     for each site individually and independently */

  evaluateGeneric (tr, tr->start, TRUE);

  likelihood = tr->likelihood;

  /* now compute per-site log likelihoods using the respective functions */

#if (defined( _USE_PTHREADS ) || defined(_FINE_GRAIN_MPI))
  /* here we need a barrier to invoke a parallel region that calls 
     function 
     perSiteLogLikelihoodsPthreads(tree *tr, double *lhs, int n, int tid)
     defined above and subsequently collects the per-site log likelihoods 
     computed by the threads and stored in local per-thread memory 
     and stores them in buffer tr->lhs.
     This corresponds to a gather operation in MPI.
     */

  masterBarrier(THREAD_PER_SITE_LIKELIHOODS, tr);

  /* 
     when the parallel region has terminated, the per-site log likelihoods 
     are stored in array tr->lhs of the master thread which we copy to the result buffer
  */
  
  memcpy(logLikelihoods, tr->lhs, sizeof(double) * tr->originalCrunchedLength);


#else

  /* sequential case: just loop over all partitions and compute per site log likelihoods */

  for(model = 0; model < tr->NumberOfModels; model++)
  {
    lower = tr->partitionData[model].lower;
    upper = tr->partitionData[model].upper;

    for(i = lower, localCount = 0; i < upper; i++, localCount++)
    {
      double 
        l;

      /* 
         we need to switch of rate heterogeneity implementations here.
         when we have PSR we actually need to provide the per-site rate 
         to the function evaluatePartialGeneric() that computes the 
         per-site log likelihood.
         Under GAMMA, the rate will just be ignored, here we just set it to 1.0
         */

      switch(tr->rateHetModel)
      {
        case CAT:
          l = evaluatePartialGeneric (tr, i, tr->partitionData[model].perSiteRates[tr->partitionData[model].rateCategory[localCount]], model);
          break;
        case GAMMA:
          l = evaluatePartialGeneric (tr, i, 1.0, model);
          break;
        default:
          assert(0);
      }

      /* store value in result array and add the likelihood of this site to the overall likelihood */

      logLikelihoods[i] = l;
      accumulatedPerSiteLikelihood += l;
    } 
  }


  /* error checking. We need a dirt ABS() < epsilon here, because the implementations 
     (standard versus per-site) are pretty different and hence slight numerical 
     deviations are expected */

  assert(ABS(tr->likelihood - accumulatedPerSiteLikelihood) < 0.00001);
  
#endif
  


}
boolean testInsertBIG (tree *tr, nodeptr p, nodeptr q)
{
  double  qz[NUM_BRANCHES], pz[NUM_BRANCHES];
  nodeptr  r;
  boolean doIt = TRUE;
  double startLH = tr->endLH;
  int i;
  
  r = q->back; 
  for(i = 0; i < tr->numBranches; i++)
    {
      qz[i] = q->z[i];
      pz[i] = p->z[i];
    }
  
  if(tr->grouped)
    {
      int rNumber, qNumber, pNumber;
      
      doIt = FALSE;
      
      rNumber = tr->constraintVector[r->number];
      qNumber = tr->constraintVector[q->number];
      pNumber = tr->constraintVector[p->number];
      
      if(pNumber == -9)
	pNumber = checker(tr, p->back);
      if(pNumber == -9)
	doIt = TRUE;
      else
	{
	  if(qNumber == -9)
	    qNumber = checker(tr, q);
	  
	  if(rNumber == -9)
	    rNumber = checker(tr, r);
	  
	  if(pNumber == rNumber || pNumber == qNumber)
	    doIt = TRUE;    	  
	}
    }
  
  if(doIt)
    {     
      if (! insertBIG(tr, p, q, tr->numBranches))       return FALSE;         
      
      evaluateGeneric(tr, p->next->next);   
       
      if(tr->likelihood > tr->bestOfNode)
	{
	  tr->bestOfNode = tr->likelihood;
	  tr->insertNode = q;
	  tr->removeNode = p;   
	  for(i = 0; i < tr->numBranches; i++)
	    {
	      tr->currentZQR[i] = tr->zqr[i];           
	      tr->currentLZR[i] = tr->lzr[i];
	      tr->currentLZQ[i] = tr->lzq[i];
	      tr->currentLZS[i] = tr->lzs[i];      
	    }
	}
      
      if(tr->likelihood > tr->endLH)
	{			  
	  tr->insertNode = q;
	  tr->removeNode = p;   
	  for(i = 0; i < tr->numBranches; i++)
	    tr->currentZQR[i] = tr->zqr[i];      
	  tr->endLH = tr->likelihood;                      
	}        
      
      hookup(q, r, qz, tr->numBranches);
      
      p->next->next->back = p->next->back = (nodeptr) NULL;
      
      if(Thorough)
	{
	  nodeptr s = p->back;
	  hookup(p, s, pz, tr->numBranches);      
	} 
      
      if((tr->doCutoff) && (tr->likelihood < startLH))
	{
	  tr->lhAVG += (startLH - tr->likelihood);
	  tr->lhDEC++;
	  if((startLH - tr->likelihood) >= tr->lhCutoff)
	    return FALSE;	    
	  else
	    return TRUE;
	}
      else
	return TRUE;
    }
  else
    return TRUE;  
}
Exemple #20
0
static void computeAllLHs(tree *tr, analdef *adef, char *bootStrapFileName)
{
  int 
    numberOfTrees = 0,   
    i;
  char ch; 
  double 
    bestLH = unlikely;    
  bestlist *bestT;
  FILE *infoFile, *result;
  

  infoFile = fopen(infoFileName, "a");
  result   = fopen(resultFileName, "w");

  bestT = (bestlist *) malloc(sizeof(bestlist));
  bestT->ninit = 0;
  initBestTree(bestT, 1, tr->mxtips);

  allocNodex(tr, adef); 

  INFILE = fopen(bootStrapFileName, "r");       
  while((ch = getc(INFILE)) != EOF)
    {
      if(ch == ';')
	numberOfTrees++;
    }	 
  rewind(INFILE);
 
  printf("\n\nFound %d trees in File %s\n\n", numberOfTrees, bootStrapFileName);
  fprintf(infoFile, "\n\nBB Found %d trees in File %s\n\n", numberOfTrees, bootStrapFileName);
 
  for(i = 0; i < numberOfTrees; i++)
    {              
      treeReadLen(INFILE, tr, adef);      
      
      if(i == 0)
	{
	  modOpt(tr, adef);
	  printf("Model optimization, first Tree: %f\n", tr->likelihood);
	  fprintf(infoFile, "Model optimization, first Tree: %f\n", tr->likelihood);
	  bestLH = tr->likelihood;
	  resetBranches(tr);
	}
      
      treeEvaluate(tr, 2);
      Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, 
		  TRUE, adef, SUMMARIZE_LH);
                 
      fprintf(result, "%s", tr->tree_string);
      
      saveBestTree(bestT, tr);

      if(tr->likelihood > bestLH)		
	bestLH   = tr->likelihood;	
      printf("Tree %d Likelihood %f\n", i, tr->likelihood);
      fprintf(infoFile, "Tree %d Likelihood %f\n", i, tr->likelihood);
    }        
    
  recallBestTree(bestT, 1, tr);  
  evaluateGeneric(tr, tr->start);
  printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood); 
  fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); 
  modOpt(tr, adef);
  treeEvaluate(tr, 2);
  printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood);
  fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); 

  printf("\nAll evaluated trees with branch lengths written to File: %s\n", resultFileName);
  fprintf(infoFile, "\nAll evaluated trees with branch lengths written to File: %s\n", resultFileName);

  fclose(INFILE); 
  fclose(infoFile);
  fclose(result);
  exit(0);
}
void computeAncestralStates(tree *tr, double referenceLikelihood, analdef *adef)
{
  int 
    counter = 0;
  
  char 
    treeFileName[2048],
    ancestralProbsFileName[2048],
    ancestralStatesFileName[2048];

  FILE
    *treeFile,
    *probsFile,
    *statesFile;

#ifdef _USE_PTHREADS
  tr->ancestralStates = (double*)malloc(getContiguousVectorLength(tr) * sizeof(double));
#endif

  /*  assert(!adef->compressPatterns);*/

  strcpy(ancestralProbsFileName,         workdir);
  strcpy(ancestralStatesFileName,         workdir);
  strcpy(treeFileName,         workdir);

  strcat(ancestralProbsFileName,         "RAxML_marginalAncestralProbabilities.");
  strcat(ancestralStatesFileName,        "RAxML_marginalAncestralStates.");
  strcat(treeFileName,                   "RAxML_nodeLabelledRootedTree.");

  strcat(ancestralProbsFileName,         run_id);
  strcat(ancestralStatesFileName,        run_id);
  strcat(treeFileName,                   run_id);
  
  probsFile = myfopen(ancestralProbsFileName,   "w");
  statesFile = myfopen(ancestralStatesFileName, "w");
  treeFile   = myfopen(treeFileName,            "w");

  assert(tr->leftRootNode == tr->rightRootNode->back);
  
  computeAncestralRec(tr, tr->leftRootNode, &counter, probsFile, statesFile, FALSE);
  computeAncestralRec(tr, tr->rightRootNode, &counter, probsFile, statesFile, FALSE);
  computeAncestralRec(tr, tr->rightRootNode, &counter, probsFile, statesFile, TRUE);
  
  evaluateGeneric(tr, tr->rightRootNode);

  if(fabs(tr->likelihood - referenceLikelihood) > 0.5)
    {
      printf("Something suspiciuous is going on with the marginal ancestral probability computations\n");
      assert(0);
    } 
  
  assert(counter == tr->mxtips - 1);
    
  ancestralTree(tr->tree_string, tr);

  fprintf(treeFile, "%s\n", tr->tree_string);

  fclose(probsFile);
  fclose(statesFile);
  fclose(treeFile);

  printBothOpen("Marginal Ancestral Probabilities written to file:\n%s\n\n", ancestralProbsFileName);
  printBothOpen("Ancestral Sequences based on Marginal Ancestral Probabilities written to file:\n%s\n\n", ancestralStatesFileName); 
  printBothOpen("Node-laballed ROOTED tree written to file:\n%s\n", treeFileName);
}
Exemple #22
0
void mcmc(tree *tr, analdef *adef)
{
  int i=0;

  tr->startLH = tr->likelihood;
  printBothOpen("start minimalistic search with LH %f\n", tr->likelihood);
  printBothOpen("tr LH %f, startLH %f\n", tr->likelihood, tr->startLH);
  
  int insert_id;
  int j;

  int maxradius = 30;
  int accepted_spr = 0, accepted_nni = 0, accepted_bl = 0, accepted_model = 0, accepted_gamma = 0, inserts = 0;
  int rejected_spr = 0, rejected_nni = 0, rejected_bl = 0, rejected_model = 0, rejected_gamma = 0;
  int num_moves = 10000;
  boolean proposalAccepted;
  boolean proposalSuccess;
  prop which_proposal;
  double testr;
  double acceptance;

  srand (440);
  double totalTime = 0.0, proposalTime = 0.0, blTime = 0.0, printTime = 0.0;
  double t_start = gettime();
  double t;


  //allocate states
  double bl_prior_exp_lambda = 0.1;
  double bl_sliding_window_w = 0.005;
  double gm_sliding_window_w = 0.75;
  double rt_sliding_window_w = 0.5;
  state *curstate = state_init(tr, adef, maxradius, bl_sliding_window_w, rt_sliding_window_w, gm_sliding_window_w, bl_prior_exp_lambda);
  printStateFileHeader(curstate);
  set_start_bl(curstate);
  printf("start bl_prior: %f\n",curstate->bl_prior);
  set_start_prior(curstate);
  curstate->hastings = 1;//needs to be set by the proposal when necessary

  /* Set the starting LH with a full traversal */
  evaluateGeneric(tr, tr->start, TRUE);	 
  tr->startLH = tr->likelihood;
  printBothOpen("Starting with tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);

  /* Set reasonable model parameters */
  evaluateGeneric(curstate->tr, curstate->tr->start, FALSE); // just for validation 
  printBothOpen("tr LH before modOpt %f\n",curstate->tr->likelihood);
  printSubsRates(curstate->tr, curstate->model, curstate->numSubsRates);

  /* optimize the model with Brents method for reasonable starting points */
  modOpt(curstate->tr, curstate->adef, 5.0); /* not by proposal, just using std raxml machinery... */
  evaluateGeneric(curstate->tr, curstate->tr->start, FALSE); // just for validation 
  printBothOpen("tr LH after modOpt %f\n",curstate->tr->likelihood);
  printSubsRates(curstate->tr, curstate->model, curstate->numSubsRates);
  recordSubsRates(curstate->tr, curstate->model, curstate->numSubsRates, curstate->curSubsRates);

  int first = 1;
  /* beginning of the MCMC chain */
  for(j=0; j<num_moves; j++)
  {
    //printBothOpen("iter %d, tr LH %f, startLH %f\n",j, tr->likelihood, tr->startLH);
    //printRecomTree(tr, TRUE, "startiter");
    proposalAccepted = FALSE;
    t = gettime(); 

    /*
      evaluateGeneric(tr, tr->start); // just for validation 
      printBothOpen("before proposal, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
    */

    which_proposal = proposal(curstate);
    if (first == 1)
    {
      first = 0;
      curstate->curprior = curstate->newprior;
    }
   //printBothOpen("proposal done, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
    assert(which_proposal == SPR || which_proposal == stNNI ||
           which_proposal == UPDATE_ALL_BL || 
           which_proposal == UPDATE_MODEL || which_proposal == UPDATE_GAMMA);
    proposalTime += gettime() - t;
    /* decide upon acceptance */
    testr = (double)rand()/(double)RAND_MAX;
    //should look something like 
    acceptance = fmin(1,(curstate->hastings) * 
		      (exp(curstate->newprior-curstate->curprior)) * (exp(curstate->tr->likelihood-curstate->tr->startLH)));
    
    /*
      //printRecomTree(tr, FALSE, "after proposal");
      printBothOpen("after proposal, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
    */
    if(testr < acceptance)
    {
      proposalAccepted = TRUE;

      switch(which_proposal)
	{
	case SPR:      
	  //printRecomTree(tr, TRUE, "after accepted");
	  // printBothOpen("SPR new topology , iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
	   accepted_spr++;
	  break;
	case stNNI:	  
	  printBothOpen("NNI new topology , iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
	  accepted_nni++;
	  break;
	case UPDATE_ALL_BL:	  
	  //      printBothOpen("BL new , iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
	  accepted_bl++;
	  break;
	case UPDATE_MODEL:      
	  //	printBothOpen("Model new, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
	  accepted_model++;
	  break;
	case UPDATE_GAMMA:      
	  //	printBothOpen("Gamma new, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
	  accepted_gamma++;
	  break;
	default:
	  assert(0);
	}

      curstate->tr->startLH = curstate->tr->likelihood;  //new LH
      curstate->curprior = curstate->newprior;          
    }
    else
    {
      //printBothOpen("rejected , iter %d tr LH %f, startLH %f, %i \n", j, tr->likelihood, tr->startLH, which_proposal);
      resetState(which_proposal,curstate);
      
      switch(which_proposal)
	{
	case SPR:
	  rejected_spr++;
	  break;
	case stNNI:
	  rejected_nni++;
	  break;
	case UPDATE_ALL_BL:
	  rejected_bl++;
	  break;
	case UPDATE_MODEL:
	  rejected_model++;
	  break;
	case UPDATE_GAMMA:
	  rejected_gamma++;
	  break;
	default:
	  assert(0);
	}
      
      evaluateGeneric(tr, tr->start, FALSE); 
      
      // just for validation 

      if(fabs(curstate->tr->startLH - tr->likelihood) > 1.0E-10)
      {
        printBothOpen("WARNING: LH diff %.10f\n", curstate->tr->startLH - tr->likelihood);
      }
      //printRecomTree(tr, TRUE, "after reset");
      //printBothOpen("after reset, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH);
      assert(fabs(curstate->tr->startLH - tr->likelihood) < 1.0E-10);
    }       
    inserts++;
    
    /* need to print status */
    if (j % 50 == 0)
    {
      t = gettime(); 
      printBothOpen("sampled at iter %d, tr LH %f, startLH %f, prior %f, incr %f\n",j, tr->likelihood, tr->startLH, curstate->curprior, tr->likelihood - tr->startLH);
      boolean printBranchLengths = TRUE;
      /*printSimpleTree(tr, printBranchLengths, adef);*/
      //TODO: print some parameters to a file 
      printStateFile(j,curstate);
      printTime += gettime() - t;
    }
  }

  t = gettime(); 
  treeEvaluate(tr, 1);
  blTime += gettime() - t;
  printBothOpen("accepted SPR %d, accepted stNNI %d, accepted BL %d, accepted model %d, accepted gamma %d, num moves tried %d, SPRs with max radius %d\n", 
		accepted_spr, accepted_nni, accepted_bl, accepted_model, accepted_gamma, num_moves, maxradius);
  printBothOpen("rejected SPR %d, rejected stNNI %d, rejected BL %d, rejected model %d, rejected gamma %d\n",
		rejected_spr, rejected_nni, rejected_bl, rejected_model, rejected_gamma);
  printBothOpen("ratio SPR %f, ratio stNNI %f,  ratio BL %f, ratio model %f, ratio gamma %f\n",
		accepted_spr/(double)(rejected_spr+accepted_spr), accepted_nni/(double)(rejected_nni+accepted_nni), accepted_bl/(double)(rejected_bl+accepted_bl), 
		accepted_model/(double)(rejected_model+accepted_model), accepted_gamma/(double)(rejected_gamma+accepted_gamma));
  printBothOpen("total  %f, BL %f, printing %f, proposal %f\n", gettime()- t_start, blTime, printTime, proposalTime);
  assert(inserts == num_moves);
  state_free(curstate);
}
Exemple #23
0
static boolean stNNIproposal(state *s)
{
  //s->newprior = 1;
  s->bl_prior = 0;
  int attempts = 0;
  do{
    s->p = selectRandomInnerSubtree(s->tr); /* TODOFER do this ad hoc for NNI requirements*/
    if (++attempts > 500)
      return FALSE;
  }while(isTip(s->p->number, s->tr->mxtips) || isTip(s->p->back->number, s->tr->mxtips));
  assert(!isTip(s->p->number, s->tr->mxtips));
  nodeptr 
    p = s->p,
    q = s->p->back,
    pb1 = s->p->next->back,
    pb2 = s->p->next->next->back;
  assert(!isTip(q->number, s->tr->mxtips));
  nodeptr
    qb1 = q->next->back,
    qb2 = q->next->next->back;

  recordNNIBranchInfo(p, s->tr->numBranches);
  /* do only one type of NNI, nni1 */
  double randprop = (double)rand()/(double)RAND_MAX;
  boolean changeBL = TRUE;
  if (randprop < 1.0 / 3.0)
  {
    s->whichNNI = 1;
    if(!changeBL)
    {
      hookup(p, q, p->z, s->tr->numBranches);
      hookup(p->next,       qb1, q->next->z, s->tr->numBranches);
      hookup(p->next->next, pb2, p->next->next->z, s->tr->numBranches);
      hookup(q->next,       pb1, p->next->z, s->tr->numBranches);
      hookup(q->next->next, qb2, q->next->next->z, s->tr->numBranches);
    }
    else
    {
      hookupBL(p, q, p, s);
      hookupBL(p->next,       qb1, q->next, s);
      hookupBL(p->next->next, pb2, p->next->next, s);
      hookupBL(q->next,       pb1, p->next, s);
      hookupBL(q->next->next, qb2, q->next->next, s);
    }
  }
  else if (randprop < 2.0 / 3.0)
  {
    s->whichNNI = 2;
    if(!changeBL)
    {
      hookup(p, q, p->z, s->tr->numBranches);
      hookup(p->next,       pb1, p->next->z, s->tr->numBranches);
      hookup(p->next->next, qb1, q->next->z, s->tr->numBranches);
      hookup(q->next,       pb2, p->next->next->z, s->tr->numBranches);
      hookup(q->next->next, qb2, q->next->next->z, s->tr->numBranches);
    }
    else
    {
      hookupBL(p, q, p, s);
      hookupBL(p->next,       pb1, p->next, s);
      hookupBL(p->next->next, qb1, q->next, s);
      hookupBL(q->next,       pb2, p->next->next, s);
      hookupBL(q->next->next, qb2, q->next->next, s);
    }
  }
  else
  {
    /* change only the branch lengths */
    s->whichNNI = 0; 
    if(changeBL)
    {
      /* do it like this for symmetry */
      hookupBL(p, q, p, s);
      hookupBL(p->next,       pb1, p->next, s);
      hookupBL(p->next->next, pb2, p->next->next, s);
      hookupBL(q->next,       qb1, q->next, s);
      hookupBL(q->next->next, qb2, q->next->next, s);
    }
  }

  newviewGeneric(s->tr, p, FALSE);
  newviewGeneric(s->tr, p->back, FALSE);
  evaluateGeneric(s->tr, p, FALSE);
  return TRUE;
}
Exemple #24
0
int main (int argc, char * argv[])
{
  pllAlignmentData *alignmentData1, *alignmentData2;
  pllInstance * tr, *tr2;
  pllNewickTree * newick;
  partitionList * partitions, *partitions2;
  struct pllQueue * parts;
  int i;

  if (argc != 4)
   {
     fprintf (stderr, "usage: %s [phylip-file] [newick-file] [partition-file]\n", argv[0]);
     return (EXIT_FAILURE);
   }

  /* Create a PLL tree */
  tr = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345);
  tr2 = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345);

  /* Parse a PHYLIP file */
  alignmentData1= pllParsePHYLIP (argv[1]);
  alignmentData2 = pllParsePHYLIP (argv[1]);

  if (!alignmentData1)
   {
     fprintf (stderr, "Error while parsing %s\n", argv[1]);
     return (EXIT_FAILURE);
   }

  /* Parse a NEWICK file */
  newick = pllNewickParseFile (argv[2]);

  if (!newick)
   {
     fprintf (stderr, "Error while parsing newick file %s\n", argv[2]);
     return (EXIT_FAILURE);
   }

  if (!pllValidateNewick (newick))  /* check whether the valid newick tree is also a tree that can be processed with our nodeptr structure */
   {
     fprintf (stderr, "Invalid phylogenetic tree\n");
     return (EXIT_FAILURE);
   }

  /* Parse the partitions file into a partition queue structure */
  parts = pllPartitionParse (argv[3]);

  /* Validate the partitions */
  if (!pllPartitionsValidate (parts, alignmentData1))
   {
     fprintf (stderr, "Error: Partitions do not cover all sites\n");
     return (EXIT_FAILURE);
   }

  /* commit the partitions and build a partitions structure */
  partitions = pllPartitionsCommit (parts, alignmentData1);
  partitions2 =  pllPartitionsCommit (parts, alignmentData2);

  /* destroy the  intermedia partition queue structure */
  pllQueuePartitionsDestroy (&parts);

  /* eliminate duplicate sites from the alignment and update weights vector */
  pllPhylipRemoveDuplicate (alignmentData1, partitions);
  pllPhylipRemoveDuplicate (alignmentData2, partitions2);


  /* Set the topology of the PLL tree from a parsed newick tree */
  //pllTreeInitTopologyNewick (tr, newick, PLL_TRUE);
  /* Or instead of the previous function use the next commented line to create
     a random tree topology
  pllTreeInitTopologyRandom (tr, phylip->nTaxa, phylip->label); */

  pllTreeInitTopologyForAlignment(tr, alignmentData1);

  /* Connect the alignment with the tree structure */
  if (!pllLoadAlignment (tr, alignmentData1, partitions, PLL_DEEP_COPY))
   {
     fprintf (stderr, "Incompatible tree/alignment combination\n");
     return (EXIT_FAILURE);
   }

  /* Initialize the model TODO: Put the parameters in a logical order and change the TRUE to flags */
 pllInitModel(tr, alignmentData1, partitions);

  /* TODO transform into pll functions !*/

  /*
     allocateParsimonyDataStructures(tr, partitions);
     pllMakeParsimonyTreeFast(tr, partitions);
     pllFreeParsimonyDataStructures(tr, partitions);
  */

  pllComputeRandomizedStepwiseAdditionParsimonyTree(tr, partitions);
  pllTreeToNewick (tr->tree_string, tr, partitions, tr->start->back, PLL_TRUE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
  printf ("Tree: %s %d\n", tr->tree_string, tr->start->number);
  evaluateGeneric(tr, partitions, tr->start, PLL_TRUE, PLL_FALSE);

  double
    firstTree = tr->likelihood;

  printf("%f \n", tr->likelihood);
  //computeBIGRAPID_Test(tr, partitions, PLL_TRUE);
  printf("final like %f\n", tr->likelihood);
  //pllInitModel(tr, PLL_TRUE, phylip, partitions);

  pllTreeInitTopologyNewick (tr2, newick, PLL_TRUE);
  if (!pllLoadAlignment (tr2, alignmentData2, partitions2, PLL_DEEP_COPY))
   {
     fprintf (stderr, "Incompatible tree/alignment combination\n");
     return (EXIT_FAILURE);
   }
  pllInitModel(tr2, alignmentData2, partitions2);

  pllTreeToNewick (tr2->tree_string, tr2, partitions2, tr2->start->back, PLL_TRUE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
  printf ("Tree: %s %d\n", tr2->tree_string, tr2->start->number);
  evaluateGeneric(tr2, partitions2, tr2->start, PLL_TRUE, PLL_FALSE);

  printf("%f \n", tr2->likelihood);

  double
    secondTree = tr2->likelihood;

  assert(firstTree == secondTree);

  pllOptimizeModelParameters(tr2, partitions2, 10.0);

  printf("%f \n", tr2->likelihood);

  pllAlignmentDataDestroy (alignmentData1);
  pllNewickParseDestroy (&newick);

  pllPartitionsDestroy (tr, &partitions);
  pllTreeDestroy (tr);

  pllAlignmentDataDestroy (alignmentData2); 
  pllPartitionsDestroy (&partitions2, tr2->mxtips);
  pllTreeDestroy (tr2);



  for(i = 0; i < 5; i++)
    {
      //write a simple partition file with 3 partitions 
      //for dataset dna.phy.dat contained 
      //in this source directory 
      
      FILE *f = fopen("dummy", "w");
      
      fprintf(f, "DNA, p1 = 1-200\n");
      fprintf(f, "DNA, p1 = 201-400\n");
      fprintf(f, "DNA, p1 = 401-705\n");
      
      fclose(f);
      
      tr = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345);
      
      alignmentData1= pllParsePHYLIP (argv[1]);
      
      newick = pllNewickParseFile (argv[2]);
      
      parts = pllPartitionParse ("dummy");
      
      /* Validate the partitions */
      if (!pllPartitionsValidate (parts, alignmentData1))
	{
	  fprintf (stderr, "Error: Partitions do not cover all sites\n");
	  return (EXIT_FAILURE);
	}
      
      /* commit the partitions and build a partitions structure */
      partitions = pllPartitionsCommit (parts, alignmentData1);
      
      /* destroy the  intermedia partition queue structure */
      pllQueuePartitionsDestroy (&parts);
      
      /* eliminate duplicate sites from the alignment and update weights vector */
      pllPhylipRemoveDuplicate (alignmentData1, partitions);
      
      pllTreeInitTopologyNewick (tr, newick, PLL_TRUE);
      if (!pllLoadAlignment (tr, alignmentData1, partitions, PLL_DEEP_COPY))
	{
	  fprintf (stderr, "Incompatible tree/alignment combination\n");
	  return (EXIT_FAILURE);
	}
      pllInitModel(tr, alignmentData1, partitions);
      
      switch(i)
	{
	case 0:
	  //link params in one way 
	  
	  pllLinkAlphaParameters("0,1,2", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,2", partitions);	
	  break;
	case 1:
	  //link params in another way 
	  
	  pllLinkAlphaParameters("0,0,0", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,2", partitions);    
	  break;
	case 2:
	  //link params in yet another way 
	  
	  pllLinkAlphaParameters("0,0,0", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,0", partitions);    	
	  break;

	case 3:
	  //also fiddle around with the Q matrices, make them to be non-GTR, but simpler
	  
	  pllLinkAlphaParameters("0,1,2", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,2", partitions);    
	  
	  //these are GTR models
	  pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 0);	  
	  pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 1);

	  //this is a simpler model with 5 parameters, parameter a and f have 
	  //the same value
	  pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,0", partitions, 2);
	  break;

	case 4:
	  {
	    //test case to show how the model parameters can be set to fixed values

	    // set up arrays of user-defined base frequencies 
	    // and a user defined q matrix 
	    double 
	      f[4] = {0.25, 0.25, 0.25, 0.25},
	      q[6] = {1.0, 1.0, 1.0, 1.0, 1.0, 0.5};
	    
	      //unlink alpha parameters base frequencies and Q matrices 
	      //across all partitions
	    pllLinkAlphaParameters("0,1,2", partitions);
	    pllLinkFrequencies("0,0,1", partitions);
	    pllLinkRates("0,1,2", partitions);
	    
	    //set alpha to a fixed value of 1.0 for partition 0 and 
	    //parition 1
	    pllSetFixedAlpha(1.0, 0, partitions, tr);
	    pllSetFixedAlpha(1.0, 1, partitions, tr);

	    //fix the base frequencies to 0.25 for 
	    //partitions 0 and 1
	    pllSetFixedBaseFrequencies(f, 4, 0, partitions, tr);
	    pllSetFixedBaseFrequencies(f, 4, 1, partitions, tr);
	    
	    //set the Q matrix to fixed values for partition 
	    //0
	    pllSetFixedSubstitutionMatrix(q, 6, 0, partitions, tr);	    	    
	  }	
	  break;
	default:
	  assert(0);
	}
      
      evaluateGeneric(tr, partitions, tr->start, PLL_TRUE, PLL_FALSE);
      printf("%f \n", tr->likelihood);
      pllOptimizeModelParameters(tr, partitions, 10.0);

      //print the model parameters 

      printModelParameters(partitions);

      printf("%f \n", tr->likelihood); 
      //cleanup
      pllAlignmentDataDestroy (alignmentData1);
      pllNewickParseDestroy (&newick);
      
      pllPartitionsDestroy (&partitions, tr->mxtips);
      pllTreeDestroy (tr);      
    }
  
  testProteinStuff();

  return (EXIT_SUCCESS);
}
Exemple #25
0
static void testProteinStuff()
{
  pllAlignmentData * alignmentData;
  pllInstance * tr;
  pllNewickTree * newick;
  
  partitionList * partitions;
  
  struct pllQueue * parts;
  
  int i;
  
  for(i = 0; i < 5; i++)
    {
      //write a simple partition file with 3 partitions 
      //for dataset dna.phy.dat contained 
      //in this source directory 
      
      FILE *f = fopen("proteinPartitions", "w");
      
      switch(i)
	{
	case 0:
	  fprintf(f, "WAG, p1 = 1-200\n");
	  fprintf(f, "WAG, p2 = 201-600\n");
	  fprintf(f, "WAG, p3 = 601-1104\n");
	  break;
	case 1:
	  fprintf(f, "LG, p1 = 1-200\n");
	  fprintf(f, "LG, p2 = 201-600\n");
	  fprintf(f, "LG, p3 = 601-1104\n");
	  break;
	case 2:
	  fprintf(f, "JTT, p1 = 1-200\n");
	  fprintf(f, "JTT, p2 = 201-600\n");
	  fprintf(f, "JTT, p3 = 601-1104\n");
	  break;
	case 3:	  
	case 4:
	  fprintf(f, "GTR, p1 = 1-200\n");
	  fprintf(f, "GTR, p2 = 201-600\n");
	  fprintf(f, "GTR, p3 = 601-1104\n");
	  break;
	default:
	  assert(0);
	}
      
      fclose(f);
      
      tr = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345);
      
      alignmentData = pllParsePHYLIP ("prot.phy");

      /* or alternatively, parse a FASTA file */
      // alignmentData = pllParseFASTA ("prot.phy");
      
      newick = pllNewickParseFile("parsimonyTree");
      
      parts = pllPartitionParse ("proteinPartitions");
      
      /* Validate the partitions */
      if (!pllPartitionsValidate (parts, alignmentData))
	{
	  fprintf (stderr, "Error: Partitions do not cover all sites\n");
	  return (EXIT_FAILURE);
	}
      
      /* commit the partitions and build a partitions structure */
      partitions = pllPartitionsCommit (parts, alignmentData);
      
      /* destroy the  intermedia partition queue structure */
      pllQueuePartitionsDestroy (&parts);
      
      /* eliminate duplicate sites from the alignment and update weights vector */
      pllPhylipRemoveDuplicate (alignmentData, partitions);
      
      pllTreeInitTopologyNewick (tr, newick, PLL_TRUE);
      if (!pllLoadAlignment (tr, alignmentData, partitions, PLL_DEEP_COPY))
	{
	  fprintf (stderr, "Incompatible tree/alignment combination\n");
	  return (EXIT_FAILURE);
	}
      //pllInitModel(tr, PLL_TRUE, alignmentData, partitions);
      pllInitModel(tr, alignmentData, partitions);
      
      switch(i)
	{
	case 0:
	  //all params unlinked 
	  
	  pllLinkAlphaParameters("0,1,2", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,2", partitions);	
	  break;
	case 1:
	  //link params in another way 
	  
	  pllLinkAlphaParameters("0,0,0", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,2", partitions);    
	  break;
	case 2:
	  //link params in yet another way 
	  
	  pllLinkAlphaParameters("0,0,0", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,0", partitions);    	
	  break;

	case 3:
	  //also fiddle around with the Q matrices, make them to be non-GTR, but simpler
	  
	  pllLinkAlphaParameters("0,1,2", partitions);
	  pllLinkFrequencies("0,1,2", partitions);
	  pllLinkRates("0,1,2", partitions);    
	  
	  //these are GTR models
	  //pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 0);	  
	  //pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 1);

	  //this is a simpler model with 5 parameters, parameter a and f have 
	  //the same value
	  //pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,0", partitions, 2);
	  break;

	case 4:
	  {
	    //test case to show how the model parameters can be set to fixed values

	    // set up arrays of user-defined base frequencies 
	    // and a user defined q matrix 
	    double 
	      f[4] = {0.25, 0.25, 0.25, 0.25},
	      q[6] = {1.0, 1.0, 1.0, 1.0, 1.0, 0.5};
	    
	      //unlink alpha parameters base frequencies and Q matrices 
	      //across all partitions
	    pllLinkAlphaParameters("0,1,2", partitions);
	    pllLinkFrequencies("0,1,2", partitions);
	    pllLinkRates("0,0,0", partitions);
	    
	    //set alpha to a fixed value of 1.0 for partition 0 and 
	    //parition 1
	    //pllSetFixedAlpha(1.0, 0, partitions, tr);
	    //pllSetFixedAlpha(1.0, 1, partitions, tr);

	    //fix the base frequencies to 0.25 for 
	    //partitions 0 and 1
	    //pllSetFixedBaseFrequencies(f, 4, 0, partitions, tr);
	    //pllSetFixedBaseFrequencies(f, 4, 1, partitions, tr);
	    
	    //set the Q matrix to fixed values for partition 
	    //0
	    //pllSetFixedSubstitutionMatrix(q, 6, 0, partitions, tr);	    	    
	  }	  
	  break;
	default:
	  assert(0);
	}
      
      evaluateGeneric(tr, partitions, tr->start, PLL_TRUE, PLL_FALSE);
      printf("%f \n", tr->likelihood);
      pllOptimizeModelParameters(tr, partitions, 1.0);

      //print the model parameters 

      //printModelParameters(partitions);

      printf("%f \n", tr->likelihood); 
      //cleanup
      pllAlignmentDataDestroy (alignmentData);
      pllNewickParseDestroy (&newick);
      
      pllPartitionsDestroy (tr, &partitions);
      pllTreeDestroy (tr);      
    }
}
boolean update(tree *tr, nodeptr p)
{       
  nodeptr  q; 
  boolean smoothedPartitions[NUM_BRANCHES];
  int i;
  double   z[NUM_BRANCHES], z0[NUM_BRANCHES];
  double _deltaz;

#ifdef _DEBUG_UPDATE
  double 
    startLH;

  evaluateGeneric(tr, p);

  startLH = tr->likelihood;
#endif

  q = p->back;   

  for(i = 0; i < tr->numBranches; i++)
    z0[i] = q->z[i];    

  if(tr->numBranches > 1)
    makenewzGeneric(tr, p, q, z0, newzpercycle, z, TRUE);  
  else
    makenewzGeneric(tr, p, q, z0, newzpercycle, z, FALSE);
  
  for(i = 0; i < tr->numBranches; i++)    
    smoothedPartitions[i]  = tr->partitionSmoothed[i];
      
  for(i = 0; i < tr->numBranches; i++)
    {         
      if(!tr->partitionConverged[i])
	{	  
	    _deltaz = deltaz;
	    
	  if(ABS(z[i] - z0[i]) > _deltaz)  
	    {	      
	      smoothedPartitions[i] = FALSE;       
	    }	 
	  	  
	  p->z[i] = q->z[i] = z[i];	 
	}
    }
  
#ifdef _DEBUG_UPDATE
  evaluateGeneric(tr, p);

  if(tr->likelihood <= startLH)
    {
      if(fabs(tr->likelihood - startLH) > 0.01)
	{
	  printf("%f %f\n", startLH, tr->likelihood);
	  assert(0);      
	}
    }
#endif

  for(i = 0; i < tr->numBranches; i++)    
    tr->partitionSmoothed[i]  = smoothedPartitions[i];
  
  return TRUE;
}
Exemple #27
0
int main(int argc, char * argv[])
{

  tree        * tr;

  if (argc != 2)
   {
     fprintf (stderr, "syntax: %s [binary-alignment-file]\n", argv[0]);
     return (1);
   }
  tr = (tree *)malloc(sizeof(tree));

  /* read the binary input, setup tree, initialize model with alignment */
  read_msa(tr,argv[1]);
  tr->randomNumberSeed = 665;
  makeRandomTree(tr);
  printf("Number of taxa: %d\n", tr->mxtips);
  printf("Number of partitions: %d\n", tr->NumberOfModels);


  /* compute the LH of the full tree */
  printf ("Virtual root: %d\n", tr->start->number);
  evaluateGeneric(tr, tr->start, TRUE);
  printf("Likelihood: %f\n", tr->likelihood);

  /* 8 rounds of branch length optimization */
  smoothTree(tr, 1);
  evaluateGeneric(tr, tr->start, TRUE);
  printf("Likelihood after branch length optimization: %.20f\n", tr->likelihood);



  /* Now we show how to find a particular LH vector for a node */
  int i;
  int node_number = tr->mxtips + 1;
  nodeptr p = tr->nodep[node_number];
  printf("Pointing to  node %d\n", p->number);

  /* Fix as VR */
  newviewGeneric(tr, p, FALSE);
  newviewGeneric(tr, p->back, FALSE);
  evaluateGeneric(tr, p, FALSE);
  printf("Likelihood : %.f\n", tr->likelihood);

  printf("Make a copy of LH vector for node  %d\n", p->number);
  likelihood_vector *vector = copy_likelihood_vectors(tr, p);
  for(i=0; i<vector->num_partitions; i++)
     printf("Partition %d requires %d bytes\n", i, (int)vector->partition_sizes[i]);

  /* Check we have the same vector in both tree and copied one */
  assert(same_vector(tr, p, vector));

  /* Now force the p to get a new value (generally branch lengths are NOT updated like this) */
  /* This is just an example to show usage (for fast NNI eval), manually updating vectors is not recommended! */
  printf("bl : %.40f\n", p->next->z[0]);
  p->next->z[0] = p->next->back->z[0] = zmin;
  printf("bl : %.40f\n", p->next->z[0]);
  newviewGeneric(tr, p, FALSE);
  assert(!same_vector(tr, p, vector));
  evaluateGeneric(tr, p, FALSE);
  printf("Likelihood : %f\n", tr->likelihood);

  restore_vector(tr, p, vector);
  assert(same_vector(tr, p, vector));
  evaluateGeneric(tr, p, FALSE);
  printf("Likelihood after manually restoring the vector : %f\n", tr->likelihood);

  free_likelihood_vector(vector);

  /* Pick an inner branch */
  printf("numBranches %d \n", tr->numBranches);
  //tr->numBranches = 1;
  p = tr->nodep[tr->mxtips + 1];
  int partition_id = 0; /* single partition */
  double bl = get_branch_length(tr, p, partition_id);
  printf("z value: %f , bl value %f\n", p->z[partition_id], bl);
  /* set the bl to 2.5 */
  double new_bl = 2.5;
  set_branch_length(tr, p, partition_id, new_bl);
  printf("Changed BL to %f\n", new_bl);
  printf("new z value: %f , new bl value %f\n", p->z[partition_id], get_branch_length(tr, p, partition_id));
  /* set back to original */
  printf("Changed to previous BL\n");
  set_branch_length(tr, p, partition_id, bl);
  printf("new z value: %f , new bl value %f\n", p->z[partition_id], get_branch_length(tr, p, partition_id));

  return (0);
}
Exemple #28
0
static void resetSimpleModelProposal(state * instate)
{
  restoreSubsRates(instate->tr, instate->adef, instate->model, instate->numSubsRates, instate->curSubsRates);
  evaluateGeneric(instate->tr, instate->tr->start, FALSE);
}