static void doSPR(tree *tr, state *instate) { nodeptr p = selectRandomSubtree(tr); /* evaluateGeneric(tr, tr->start, TRUE); printf("%f \n", tr->likelihood);*/ parsimonySPR(p, tr); /*evaluateGeneric(tr, tr->start, TRUE); printf("%f \n", tr->likelihood);*/ instate->p = p; instate->nb = p->next->back; instate->nnb = p->next->next->back; recordBranchInfo(instate->nb, instate->nbz, instate->tr->numBranches); recordBranchInfo(instate->nnb, instate->nnbz, instate->tr->numBranches); removeNodeBIG(tr, p, tr->numBranches); instate->q = tr->insertNode; instate->r = instate->q->back; recordBranchInfo(instate->q, instate->qz, instate->tr->numBranches); assert(Thorough == 0); insertBIG(instate->tr, instate->p, instate->q, instate->tr->numBranches); evaluateGeneric(instate->tr, instate->p->next->next, FALSE); /*testInsertBIG(tr, p, tr->insertNode);*/ printf("%f \n", tr->likelihood); }
//simple sliding window static void simpleGammaProposal(state * instate) { //TODO: add safety to max and min values double newalpha, curv, r,mx,mn; curv = instate->tr->partitionData[instate->model].alpha; instate->curAlpha = curv; r = (double)rand()/(double)RAND_MAX; mn = curv-(instate->gm_sliding_window_w/2); mx = curv+(instate->gm_sliding_window_w/2); newalpha = fabs(mn + r * (mx-mn)); /* Ensure always you stay within this range */ if(newalpha > ALPHA_MAX) newalpha = ALPHA_MAX; if(newalpha < ALPHA_MIN) newalpha = ALPHA_MIN; instate->tr->partitionData[instate->model].alpha = newalpha; #ifndef _LOCAL_DISCRETIZATION pllMakeGammaCats(instate->tr->partitionData[instate->model].alpha, instate->tr->partitionData[instate->model].gammaRates, 4); #endif /* TODO: for the parallel version: need to broadcast the gamma rates before re-evaluating !!!! also note the _LOCAL_DISCRETIZATION flag that should only be used for the parallel stuff ! */ evaluateGeneric(instate->tr, instate->tr->start, TRUE); }
static boolean restoreTree (topol *tpl, tree *tr) { connptr r; nodeptr p, p0; int i; for (i = 1; i <= 2*(tr->mxtips) - 2; i++) { /* Uses p = p->next at tip */ p0 = p = tr->nodep[i]; do { p->back = (nodeptr) NULL; p = p->next; } while (p != p0); } /* Copy connections from topology */ for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++) hookup(r->p, r->q, r->z, tr->numBranches); tr->likelihood = tpl->likelihood; tr->start = tpl->start; tr->ntips = tpl->ntips; tr->nextnode = tpl->nextnode; evaluateGeneric(tr, tr->start, TRUE); return TRUE; }
void treeEvaluateRandom (tree *tr, double smoothFactor) { smoothTreeRandom(tr, (int)((double)smoothings * smoothFactor)); evaluateGeneric(tr, tr->start); }
/* * should be sliding window proposal */ static void simpleModelProposal(state * instate) { //TODO: add safety to max and min values //record the old ones recordSubsRates(instate->tr, instate->model, instate->numSubsRates, instate->curSubsRates); //choose a random set of model params, //probably with dirichlet proposal //with uniform probabilities, no need to have other int state; double new_value,curv; double r,mx,mn; //using the branch length sliding window for a test for(state = 0;state<instate->numSubsRates ; state ++) { curv = instate->tr->partitionData[instate->model].substRates[state]; r = (double)rand()/(double)RAND_MAX; mn = curv-(instate->rt_sliding_window_w/2); mx = curv+(instate->rt_sliding_window_w/2); new_value = fabs(mn + r * (mx-mn)); /* Ensure always you stay within this range */ if(new_value > RATE_MAX) new_value = RATE_MAX; if(new_value < RATE_MIN) new_value = RATE_MIN; //printf("%i %f %f\n", state, curv, new_value); editSubsRates(instate->tr,instate->model, state, new_value); } //recalculate eigens #ifndef _LOCAL_DISCRETIZATION pllInitReversibleGTR(instate->tr, instate->model); /* 1. recomputes Eigenvectors, Eigenvalues etc. for Q decomp. */ #endif /* TODO: need to broadcast rates here for parallel version ! */ evaluateGeneric(instate->tr, instate->tr->start, TRUE); /* 2. re-traverse the full tree to update all vectors */ //TODO: without this, the run will fail after a successful model, but failing SPR evaluateGeneric(instate->tr, instate->tr->start, FALSE); //for prior, just use dirichlet // independent gamma distribution for each parameter //the pdf for this is // for gamma the prior is gamma //for statefreqs should all be uniform //only calculate the new ones }
boolean testInsertRestoreBIG (tree *tr, nodeptr p, nodeptr q) { if(Thorough) { if (! insertBIG(tr, p, q, tr->numBranches)) return FALSE; evaluateGeneric(tr, p->next->next); } else { if (! insertRestoreBIG(tr, p, q)) return FALSE; { nodeptr x, y; x = p->next->next; y = p->back; if(! isTip(x->number, tr->rdta->numsp) && isTip(y->number, tr->rdta->numsp)) { while ((! x->x)) { if (! (x->x)) newviewGeneric(tr, x); } } if(isTip(x->number, tr->rdta->numsp) && !isTip(y->number, tr->rdta->numsp)) { while ((! y->x)) { if (! (y->x)) newviewGeneric(tr, y); } } if(!isTip(x->number, tr->rdta->numsp) && !isTip(y->number, tr->rdta->numsp)) { while ((! x->x) || (! y->x)) { if (! (x->x)) newviewGeneric(tr, x); if (! (y->x)) newviewGeneric(tr, y); } } } tr->likelihood = tr->endLH; } return TRUE; }
static void restoreSubsRates(tree *tr, analdef *adef, int model, int numSubsRates, double *prevSubsRates) { assert(tr->partitionData[model].dataType = DNA_DATA); int i; for(i=0; i<numSubsRates; i++) tr->partitionData[model].substRates[i] = prevSubsRates[i]; #ifndef _LOCAL_DISCRETIZATION pllInitReversibleGTR(tr, model); #endif /* TODO need to broadcast rates here for parallel version */ evaluateGeneric(tr, tr->start, TRUE); }
static void resetSimpleGammaProposal(state * instate) { instate->tr->partitionData[instate->model].alpha = instate->curAlpha; #ifndef _LOCAL_DISCRETIZATION pllMakeGammaCats(instate->tr->partitionData[instate->model].alpha, instate->tr->partitionData[instate->model].gammaRates, 4); #endif /* TODO: for the parallel version: need to broadcast the gamma rates before re-evaluating !!!! also note the _LOCAL_DISCRETIZATION flag that should only be used for the parallel stuff ! */ evaluateGeneric(instate->tr, instate->tr->start, TRUE); }
boolean treeEvaluate (tree *tr, double smoothFactor) /* Evaluate a user tree */ { boolean result; if(tr->useBrLenScaler) assert(0); result = smoothTree(tr, (int)((double)smoothings * smoothFactor)); assert(result); evaluateGeneric(tr, tr->start); return TRUE; }
static void quickSmoothLocal(tree *tr, int n) { nodeptr p = tr->insertNode; nodeptr q; if(n == 0) { evaluateGeneric(tr, p); } else { qsmoothLocal(tr, p->back, n - 1); if(!isTip(p->number, tr->rdta->numsp)) { q = p->next; while(q != p) { qsmoothLocal(tr, q->back, n - 1); q = q->next; } } evaluateGeneric(tr, p); } }
static double testInsertFast (tree *tr, nodeptr p, nodeptr q, insertions *ins, boolean veryFast) { double qz[NUM_BRANCHES], pz[NUM_BRANCHES]; nodeptr r, s; double LH; int i; r = q->back; for(i = 0; i < tr->numBranches; i++) { qz[i] = q->z[i]; pz[i] = p->z[i]; } insertFast(tr, p, q, tr->numBranches); evaluateGeneric(tr, p->next->next); addInsertion(q, tr->likelihood, ins); if(veryFast) if(tr->likelihood > tr->endLH) { tr->insertNode = q; tr->removeNode = p; for(i = 0; i < tr->numBranches; i++) tr->currentZQR[i] = tr->zqr[i]; tr->endLH = tr->likelihood; } LH = tr->likelihood; hookup(q, r, qz, tr->numBranches); p->next->next->back = p->next->back = (nodeptr) NULL; if(Thorough) { s = p->back; hookup(p, s, pz, tr->numBranches); } return LH; }
static double testInsertThorough(tree *tr, nodeptr r, nodeptr q, boolean useVector) { double result, qz[NUM_BRANCHES], z[NUM_BRANCHES]; nodeptr x = q->back, s = r->back; int j; for(j = 0; j < tr->numBranches; j++) { qz[j] = q->z[j]; z[j] = sqrt(qz[j]); if(z[j] < zmin) z[j] = zmin; if(z[j] > zmax) z[j] = zmax; } hookup(r->next, q, z, tr->numBranches); hookup(r->next->next, x, z, tr->numBranches); hookupDefault(r, s, tr->numBranches); newviewGeneric(tr, r); localSmooth(tr, r, smoothings); if(useVector) result = evaluateGenericVector(tr, r); else result = evaluateGeneric(tr, r); hookup(q, x, qz, tr->numBranches); r->next->next->back = r->next->back = (nodeptr) NULL; return result; }
static boolean simpleBranchLengthProposal(state * instate) { //for each branch get the current branch length //pull a uniform like //x = current, w =window //uniform(x-w/2,x+w/2) update_all_branches(instate, FALSE); evaluateGeneric(instate->tr, instate->tr->start, FALSE); /* update the tr->likelihood */ //for prior, just using exponential for now //calculate for each branch length // where lambda is chosen and x is the branch length //lambda * exp(-lamba * x) //only calculate the new ones // return TRUE; }
static boolean simpleNodeProposal(state * instate) { //prior is flat for these moves instate->newprior = 1; instate->p = selectRandomInnerSubtree(instate->tr); /* records info pre-pruning */ instate->nb = instate->p->next->back; instate->nnb = instate->p->next->next->back; //printBothOpen("selected prune node %db%d bl %f \n", instate->p->number, instate->p->back->number, instate->p->z[0]); recordBranchInfo(instate->nb, instate->nbz, instate->tr->numBranches); recordBranchInfo(instate->nnb, instate->nnbz, instate->tr->numBranches); /* prune subtree p */ if (removeNodeBIG(instate->tr, instate->p, instate->tr->numBranches) == NULL) assert(FALSE); /* insert somewhere else, but it must not be in the pruned subtree */ //printBothOpen("pruned %db%d \n", instate->p->number, instate->p->back->number); instate->q = (nodeptr) NULL; naiveInsertionProposal(instate); if(instate->q!=NULL) { instate->r = instate->q->back; recordBranchInfo(instate->q, instate->qz, instate->tr->numBranches); /* printBothOpen("inserted %db%d at %db%d where bl %f, Thorough is %d \n", instate->p->number, instate->p->back->number, instate->q->number, instate->q->back->number, instate->q->z[0], Thorough); */ if (! insertBIG(instate->tr, instate->p, instate->q, instate->tr->numBranches)) assert(FALSE); //TODO: breaks here evaluateGenericSpecial.c:1164: evaluateIterative: Assertion `partitionLikelihood < 0.0' failed. evaluateGeneric(instate->tr, instate->p->next->next, FALSE); return TRUE; } else return FALSE; }
static void doNNIs(tree *tr, nodeptr p, double *lhVectors[3], boolean shSupport, int *interchanges, int *innerBranches, double *pqz_0, double *pz1_0, double *pz2_0, double *qz1_0, double *qz2_0, double *pqz_1, double *pz1_1, double *pz2_1, double *qz1_1, double *qz2_1, double *pqz_2, double *pz1_2, double *pz2_2, double *qz1_2, double *qz2_2) { nodeptr q = p->back, pb1 = p->next->back, pb2 = p->next->next->back; assert(!isTip(p->number, tr->mxtips)); if(!isTip(q->number, tr->mxtips)) { int whichNNI = 0; nodeptr qb1 = q->next->back, qb2 = q->next->next->back; double lh[3]; *innerBranches = *innerBranches + 1; nniSmooth(tr, p, 16); if(shSupport) { evaluateGenericVector(tr, p); memcpy(lhVectors[0], tr->perSiteLL, sizeof(double) * tr->cdta->endsite); } else evaluateGeneric(tr, p); lh[0] = tr->likelihood; storeBranches(tr, p, pqz_0, pz1_0, pz2_0, qz1_0, qz2_0); /*******************************************/ hookup(p, q, pqz_0, tr->numBranches); hookup(p->next, qb1, qz1_0, tr->numBranches); hookup(p->next->next, pb2, pz2_0, tr->numBranches); hookup(q->next, pb1, pz1_0, tr->numBranches); hookup(q->next->next, qb2, qz2_0, tr->numBranches); newviewGeneric(tr, p); newviewGeneric(tr, p->back); nniSmooth(tr, p, 16); if(shSupport) { evaluateGenericVector(tr, p); memcpy(lhVectors[1], tr->perSiteLL, sizeof(double) * tr->cdta->endsite); } else evaluateGeneric(tr, p); lh[1] = tr->likelihood; storeBranches(tr, p, pqz_1, pz1_1, pz2_1, qz1_1, qz2_1); if(lh[1] > lh[0]) whichNNI = 1; /*******************************************/ hookup(p, q, pqz_0, tr->numBranches); hookup(p->next, qb1, qz1_0, tr->numBranches); hookup(p->next->next, pb1, pz1_0, tr->numBranches); hookup(q->next, pb2, pz2_0, tr->numBranches); hookup(q->next->next, qb2, qz2_0, tr->numBranches); newviewGeneric(tr, p); newviewGeneric(tr, p->back); nniSmooth(tr, p, 16); if(shSupport) { evaluateGenericVector(tr, p); memcpy(lhVectors[2], tr->perSiteLL, sizeof(double) * tr->cdta->endsite); } else evaluateGeneric(tr, p); lh[2] = tr->likelihood; storeBranches(tr, p, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2); if(lh[2] > lh[0] && lh[2] > lh[1]) whichNNI = 2; /*******************************************/ if(shSupport) whichNNI = 0; switch(whichNNI) { case 0: hookup(p, q, pqz_0, tr->numBranches); hookup(p->next, pb1, pz1_0, tr->numBranches); hookup(p->next->next, pb2, pz2_0, tr->numBranches); hookup(q->next, qb1, qz1_0, tr->numBranches); hookup(q->next->next, qb2, qz2_0, tr->numBranches); break; case 1: hookup(p, q, pqz_1, tr->numBranches); hookup(p->next, qb1, pz1_1, tr->numBranches); hookup(p->next->next, pb2, pz2_1, tr->numBranches); hookup(q->next, pb1, qz1_1, tr->numBranches); hookup(q->next->next, qb2, qz2_1, tr->numBranches); break; case 2: hookup(p, q, pqz_2, tr->numBranches); hookup(p->next, qb1, pz1_2, tr->numBranches); hookup(p->next->next, pb1, pz2_2, tr->numBranches); hookup(q->next, pb2, qz1_2, tr->numBranches); hookup(q->next->next, qb2, qz2_2, tr->numBranches); break; default: assert(0); } newviewGeneric(tr, p); newviewGeneric(tr, q); if(whichNNI > 0) *interchanges = *interchanges + 1; if(shSupport) p->bInf->support = SHSupport(tr->cdta->endsite, 1000, tr->resample, lh, lhVectors); } if(!isTip(pb1->number, tr->mxtips)) doNNIs(tr, pb1, lhVectors, shSupport, interchanges, innerBranches, pqz_0, pz1_0, pz2_0, qz1_0, qz2_0, pqz_1, pz1_1, pz2_1, qz1_1, qz2_1, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2); if(!isTip(pb2->number, tr->mxtips)) doNNIs(tr, pb2, lhVectors, shSupport, interchanges, innerBranches, pqz_0, pz1_0, pz2_0, qz1_0, qz2_0, pqz_1, pz1_1, pz2_1, qz1_1, qz2_1, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2); return; }
void shSupports(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) { double diff, *lhVectors[3]; char bestTreeFileName[1024], shSupportFileName[1024]; FILE *f; int interchanges = 0, counter = 0; assert(adef->restart); tr->resample = permutationSH(tr, 1000, 12345); lhVectors[0] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite); lhVectors[1] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite); lhVectors[2] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite); tr->bInf = (branchInfo*)rax_malloc(sizeof(branchInfo) * (tr->mxtips - 3)); initModel(tr, rdta, cdta, adef); getStartingTree(tr, adef); if(adef->useBinaryModelFile) { readBinaryModel(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); } else modOpt(tr, adef, FALSE, 10.0); printBothOpen("Time after model optimization: %f\n", gettime() - masterTime); printBothOpen("Initial Likelihood %f\n\n", tr->likelihood); do { double lh1, lh2; lh1 = tr->likelihood; interchanges = encapsulateNNIs(tr, lhVectors, FALSE); evaluateGeneric(tr, tr->start); lh2 = tr->likelihood; diff = ABS(lh1 - lh2); printBothOpen("NNI interchanges %d Likelihood %f\n", interchanges, tr->likelihood); } while(diff > 0.01); printBothOpen("\nFinal Likelihood of NNI-optimized tree: %f\n\n", tr->likelihood); setupBranchInfo(tr->start->back, tr, &counter); assert(counter == tr->mxtips - 3); interchanges = encapsulateNNIs(tr, lhVectors, TRUE); strcpy(bestTreeFileName, workdir); strcat(bestTreeFileName, "RAxML_fastTree."); strcat(bestTreeFileName, run_id); Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE); f = myfopen(bestTreeFileName, "wb"); fprintf(f, "%s", tr->tree_string); fclose(f); strcpy(shSupportFileName, workdir); strcat(shSupportFileName, "RAxML_fastTreeSH_Support."); strcat(shSupportFileName, run_id); Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, TRUE); f = myfopen(shSupportFileName, "wb"); fprintf(f, "%s", tr->tree_string); fclose(f); printBothOpen("RAxML NNI-optimized tree written to file: %s\n", bestTreeFileName); printBothOpen("Same tree with SH-like supports written to file: %s\n", shSupportFileName); printBothOpen("Total execution time: %f\n", gettime() - masterTime); exit(0); }
void fastSearch(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) { double likelihood, startLikelihood, *lhVectors[3]; char bestTreeFileName[1024]; FILE *f; int model; lhVectors[0] = (double *)NULL; lhVectors[1] = (double *)NULL; lhVectors[2] = (double *)NULL; /* initialize model parameters with standard starting values */ initModel(tr, rdta, cdta, adef); printBothOpen("Time after init : %f\n", gettime() - masterTime); /* compute starting tree, either by reading in a tree specified via -t or by building one */ getStartingTree(tr, adef); printBothOpen("Time after init and starting tree: %f\n", gettime() - masterTime); /* rough model parameter optimization, the log likelihood epsilon should actually be determined based on the initial tree score and not be hard-coded */ if(adef->useBinaryModelFile) { readBinaryModel(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); } else modOpt(tr, adef, FALSE, 10.0); printBothOpen("Time after init, starting tree, mod opt: %f\n", gettime() - masterTime); /* print out the number of rate categories used for the CAT model, one should use less then the default, e.g., -c 16 works quite well */ for(model = 0; model < tr->NumberOfModels; model++) printBothOpen("Partion %d number of Cats: %d\n", model, tr->partitionData[model].numberOfCategories); /* means that we are going to do thorough insertions with real newton-raphson based br-len opt at the three branches adjactent to every insertion point */ Thorough = 1; /* loop over SPR cycles until the likelihood difference before and after the SPR cycle is <= 0.5 log likelihood units. Rather than being hard-coded this should also be determined based on the actual likelihood of the tree */ do { startLikelihood = tr->likelihood; /* conduct a cycle of linear SPRs */ likelihood = linearSPRs(tr, 20, adef->veryFast); evaluateGeneric(tr, tr->start); /* the NNIs also optimize br-lens of resulting topology a bit */ encapsulateNNIs(tr, lhVectors, FALSE); printBothOpen("LH after SPRs %f, after NNI %f\n", likelihood, tr->likelihood); } while(ABS(tr->likelihood - startLikelihood) > 0.5); /* print out the resulting tree to the RAxML_bestTree. file. note that boosttrapping or doing multiple inferences won't work. This thing computes a single tree and that's it */ strcpy(bestTreeFileName, workdir); strcat(bestTreeFileName, "RAxML_fastTree."); strcat(bestTreeFileName, run_id); Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE); f = myfopen(bestTreeFileName, "wb"); fprintf(f, "%s", tr->tree_string); fclose(f); printBothOpen("RAxML fast tree written to file: %s\n", bestTreeFileName); writeBinaryModel(tr); printBothOpen("Total execution time: %f\n", gettime() - masterTime); printBothOpen("Good bye ... \n"); }
void perSiteLogLikelihoods(tree *tr, double *logLikelihoods) { double likelihood, accumulatedPerSiteLikelihood = 0.0; size_t localCount, i, globalCounter, model, lower, upper; /* compute the likelihood of the tree with the standard function to: 1. obtain the current score for error checking 2. store a full tree traversal in the traversal descriptor that will then be used for calculating per-site log likelihoods for each site individually and independently */ evaluateGeneric (tr, tr->start, TRUE); likelihood = tr->likelihood; /* now compute per-site log likelihoods using the respective functions */ #if (defined( _USE_PTHREADS ) || defined(_FINE_GRAIN_MPI)) /* here we need a barrier to invoke a parallel region that calls function perSiteLogLikelihoodsPthreads(tree *tr, double *lhs, int n, int tid) defined above and subsequently collects the per-site log likelihoods computed by the threads and stored in local per-thread memory and stores them in buffer tr->lhs. This corresponds to a gather operation in MPI. */ masterBarrier(THREAD_PER_SITE_LIKELIHOODS, tr); /* when the parallel region has terminated, the per-site log likelihoods are stored in array tr->lhs of the master thread which we copy to the result buffer */ memcpy(logLikelihoods, tr->lhs, sizeof(double) * tr->originalCrunchedLength); #else /* sequential case: just loop over all partitions and compute per site log likelihoods */ for(model = 0; model < tr->NumberOfModels; model++) { lower = tr->partitionData[model].lower; upper = tr->partitionData[model].upper; for(i = lower, localCount = 0; i < upper; i++, localCount++) { double l; /* we need to switch of rate heterogeneity implementations here. when we have PSR we actually need to provide the per-site rate to the function evaluatePartialGeneric() that computes the per-site log likelihood. Under GAMMA, the rate will just be ignored, here we just set it to 1.0 */ switch(tr->rateHetModel) { case CAT: l = evaluatePartialGeneric (tr, i, tr->partitionData[model].perSiteRates[tr->partitionData[model].rateCategory[localCount]], model); break; case GAMMA: l = evaluatePartialGeneric (tr, i, 1.0, model); break; default: assert(0); } /* store value in result array and add the likelihood of this site to the overall likelihood */ logLikelihoods[i] = l; accumulatedPerSiteLikelihood += l; } } /* error checking. We need a dirt ABS() < epsilon here, because the implementations (standard versus per-site) are pretty different and hence slight numerical deviations are expected */ assert(ABS(tr->likelihood - accumulatedPerSiteLikelihood) < 0.00001); #endif }
boolean testInsertBIG (tree *tr, nodeptr p, nodeptr q) { double qz[NUM_BRANCHES], pz[NUM_BRANCHES]; nodeptr r; boolean doIt = TRUE; double startLH = tr->endLH; int i; r = q->back; for(i = 0; i < tr->numBranches; i++) { qz[i] = q->z[i]; pz[i] = p->z[i]; } if(tr->grouped) { int rNumber, qNumber, pNumber; doIt = FALSE; rNumber = tr->constraintVector[r->number]; qNumber = tr->constraintVector[q->number]; pNumber = tr->constraintVector[p->number]; if(pNumber == -9) pNumber = checker(tr, p->back); if(pNumber == -9) doIt = TRUE; else { if(qNumber == -9) qNumber = checker(tr, q); if(rNumber == -9) rNumber = checker(tr, r); if(pNumber == rNumber || pNumber == qNumber) doIt = TRUE; } } if(doIt) { if (! insertBIG(tr, p, q, tr->numBranches)) return FALSE; evaluateGeneric(tr, p->next->next); if(tr->likelihood > tr->bestOfNode) { tr->bestOfNode = tr->likelihood; tr->insertNode = q; tr->removeNode = p; for(i = 0; i < tr->numBranches; i++) { tr->currentZQR[i] = tr->zqr[i]; tr->currentLZR[i] = tr->lzr[i]; tr->currentLZQ[i] = tr->lzq[i]; tr->currentLZS[i] = tr->lzs[i]; } } if(tr->likelihood > tr->endLH) { tr->insertNode = q; tr->removeNode = p; for(i = 0; i < tr->numBranches; i++) tr->currentZQR[i] = tr->zqr[i]; tr->endLH = tr->likelihood; } hookup(q, r, qz, tr->numBranches); p->next->next->back = p->next->back = (nodeptr) NULL; if(Thorough) { nodeptr s = p->back; hookup(p, s, pz, tr->numBranches); } if((tr->doCutoff) && (tr->likelihood < startLH)) { tr->lhAVG += (startLH - tr->likelihood); tr->lhDEC++; if((startLH - tr->likelihood) >= tr->lhCutoff) return FALSE; else return TRUE; } else return TRUE; } else return TRUE; }
static void computeAllLHs(tree *tr, analdef *adef, char *bootStrapFileName) { int numberOfTrees = 0, i; char ch; double bestLH = unlikely; bestlist *bestT; FILE *infoFile, *result; infoFile = fopen(infoFileName, "a"); result = fopen(resultFileName, "w"); bestT = (bestlist *) malloc(sizeof(bestlist)); bestT->ninit = 0; initBestTree(bestT, 1, tr->mxtips); allocNodex(tr, adef); INFILE = fopen(bootStrapFileName, "r"); while((ch = getc(INFILE)) != EOF) { if(ch == ';') numberOfTrees++; } rewind(INFILE); printf("\n\nFound %d trees in File %s\n\n", numberOfTrees, bootStrapFileName); fprintf(infoFile, "\n\nBB Found %d trees in File %s\n\n", numberOfTrees, bootStrapFileName); for(i = 0; i < numberOfTrees; i++) { treeReadLen(INFILE, tr, adef); if(i == 0) { modOpt(tr, adef); printf("Model optimization, first Tree: %f\n", tr->likelihood); fprintf(infoFile, "Model optimization, first Tree: %f\n", tr->likelihood); bestLH = tr->likelihood; resetBranches(tr); } treeEvaluate(tr, 2); Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH); fprintf(result, "%s", tr->tree_string); saveBestTree(bestT, tr); if(tr->likelihood > bestLH) bestLH = tr->likelihood; printf("Tree %d Likelihood %f\n", i, tr->likelihood); fprintf(infoFile, "Tree %d Likelihood %f\n", i, tr->likelihood); } recallBestTree(bestT, 1, tr); evaluateGeneric(tr, tr->start); printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood); fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); modOpt(tr, adef); treeEvaluate(tr, 2); printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood); fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); printf("\nAll evaluated trees with branch lengths written to File: %s\n", resultFileName); fprintf(infoFile, "\nAll evaluated trees with branch lengths written to File: %s\n", resultFileName); fclose(INFILE); fclose(infoFile); fclose(result); exit(0); }
void computeAncestralStates(tree *tr, double referenceLikelihood, analdef *adef) { int counter = 0; char treeFileName[2048], ancestralProbsFileName[2048], ancestralStatesFileName[2048]; FILE *treeFile, *probsFile, *statesFile; #ifdef _USE_PTHREADS tr->ancestralStates = (double*)malloc(getContiguousVectorLength(tr) * sizeof(double)); #endif /* assert(!adef->compressPatterns);*/ strcpy(ancestralProbsFileName, workdir); strcpy(ancestralStatesFileName, workdir); strcpy(treeFileName, workdir); strcat(ancestralProbsFileName, "RAxML_marginalAncestralProbabilities."); strcat(ancestralStatesFileName, "RAxML_marginalAncestralStates."); strcat(treeFileName, "RAxML_nodeLabelledRootedTree."); strcat(ancestralProbsFileName, run_id); strcat(ancestralStatesFileName, run_id); strcat(treeFileName, run_id); probsFile = myfopen(ancestralProbsFileName, "w"); statesFile = myfopen(ancestralStatesFileName, "w"); treeFile = myfopen(treeFileName, "w"); assert(tr->leftRootNode == tr->rightRootNode->back); computeAncestralRec(tr, tr->leftRootNode, &counter, probsFile, statesFile, FALSE); computeAncestralRec(tr, tr->rightRootNode, &counter, probsFile, statesFile, FALSE); computeAncestralRec(tr, tr->rightRootNode, &counter, probsFile, statesFile, TRUE); evaluateGeneric(tr, tr->rightRootNode); if(fabs(tr->likelihood - referenceLikelihood) > 0.5) { printf("Something suspiciuous is going on with the marginal ancestral probability computations\n"); assert(0); } assert(counter == tr->mxtips - 1); ancestralTree(tr->tree_string, tr); fprintf(treeFile, "%s\n", tr->tree_string); fclose(probsFile); fclose(statesFile); fclose(treeFile); printBothOpen("Marginal Ancestral Probabilities written to file:\n%s\n\n", ancestralProbsFileName); printBothOpen("Ancestral Sequences based on Marginal Ancestral Probabilities written to file:\n%s\n\n", ancestralStatesFileName); printBothOpen("Node-laballed ROOTED tree written to file:\n%s\n", treeFileName); }
void mcmc(tree *tr, analdef *adef) { int i=0; tr->startLH = tr->likelihood; printBothOpen("start minimalistic search with LH %f\n", tr->likelihood); printBothOpen("tr LH %f, startLH %f\n", tr->likelihood, tr->startLH); int insert_id; int j; int maxradius = 30; int accepted_spr = 0, accepted_nni = 0, accepted_bl = 0, accepted_model = 0, accepted_gamma = 0, inserts = 0; int rejected_spr = 0, rejected_nni = 0, rejected_bl = 0, rejected_model = 0, rejected_gamma = 0; int num_moves = 10000; boolean proposalAccepted; boolean proposalSuccess; prop which_proposal; double testr; double acceptance; srand (440); double totalTime = 0.0, proposalTime = 0.0, blTime = 0.0, printTime = 0.0; double t_start = gettime(); double t; //allocate states double bl_prior_exp_lambda = 0.1; double bl_sliding_window_w = 0.005; double gm_sliding_window_w = 0.75; double rt_sliding_window_w = 0.5; state *curstate = state_init(tr, adef, maxradius, bl_sliding_window_w, rt_sliding_window_w, gm_sliding_window_w, bl_prior_exp_lambda); printStateFileHeader(curstate); set_start_bl(curstate); printf("start bl_prior: %f\n",curstate->bl_prior); set_start_prior(curstate); curstate->hastings = 1;//needs to be set by the proposal when necessary /* Set the starting LH with a full traversal */ evaluateGeneric(tr, tr->start, TRUE); tr->startLH = tr->likelihood; printBothOpen("Starting with tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); /* Set reasonable model parameters */ evaluateGeneric(curstate->tr, curstate->tr->start, FALSE); // just for validation printBothOpen("tr LH before modOpt %f\n",curstate->tr->likelihood); printSubsRates(curstate->tr, curstate->model, curstate->numSubsRates); /* optimize the model with Brents method for reasonable starting points */ modOpt(curstate->tr, curstate->adef, 5.0); /* not by proposal, just using std raxml machinery... */ evaluateGeneric(curstate->tr, curstate->tr->start, FALSE); // just for validation printBothOpen("tr LH after modOpt %f\n",curstate->tr->likelihood); printSubsRates(curstate->tr, curstate->model, curstate->numSubsRates); recordSubsRates(curstate->tr, curstate->model, curstate->numSubsRates, curstate->curSubsRates); int first = 1; /* beginning of the MCMC chain */ for(j=0; j<num_moves; j++) { //printBothOpen("iter %d, tr LH %f, startLH %f\n",j, tr->likelihood, tr->startLH); //printRecomTree(tr, TRUE, "startiter"); proposalAccepted = FALSE; t = gettime(); /* evaluateGeneric(tr, tr->start); // just for validation printBothOpen("before proposal, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); */ which_proposal = proposal(curstate); if (first == 1) { first = 0; curstate->curprior = curstate->newprior; } //printBothOpen("proposal done, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); assert(which_proposal == SPR || which_proposal == stNNI || which_proposal == UPDATE_ALL_BL || which_proposal == UPDATE_MODEL || which_proposal == UPDATE_GAMMA); proposalTime += gettime() - t; /* decide upon acceptance */ testr = (double)rand()/(double)RAND_MAX; //should look something like acceptance = fmin(1,(curstate->hastings) * (exp(curstate->newprior-curstate->curprior)) * (exp(curstate->tr->likelihood-curstate->tr->startLH))); /* //printRecomTree(tr, FALSE, "after proposal"); printBothOpen("after proposal, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); */ if(testr < acceptance) { proposalAccepted = TRUE; switch(which_proposal) { case SPR: //printRecomTree(tr, TRUE, "after accepted"); // printBothOpen("SPR new topology , iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); accepted_spr++; break; case stNNI: printBothOpen("NNI new topology , iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); accepted_nni++; break; case UPDATE_ALL_BL: // printBothOpen("BL new , iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); accepted_bl++; break; case UPDATE_MODEL: // printBothOpen("Model new, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); accepted_model++; break; case UPDATE_GAMMA: // printBothOpen("Gamma new, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); accepted_gamma++; break; default: assert(0); } curstate->tr->startLH = curstate->tr->likelihood; //new LH curstate->curprior = curstate->newprior; } else { //printBothOpen("rejected , iter %d tr LH %f, startLH %f, %i \n", j, tr->likelihood, tr->startLH, which_proposal); resetState(which_proposal,curstate); switch(which_proposal) { case SPR: rejected_spr++; break; case stNNI: rejected_nni++; break; case UPDATE_ALL_BL: rejected_bl++; break; case UPDATE_MODEL: rejected_model++; break; case UPDATE_GAMMA: rejected_gamma++; break; default: assert(0); } evaluateGeneric(tr, tr->start, FALSE); // just for validation if(fabs(curstate->tr->startLH - tr->likelihood) > 1.0E-10) { printBothOpen("WARNING: LH diff %.10f\n", curstate->tr->startLH - tr->likelihood); } //printRecomTree(tr, TRUE, "after reset"); //printBothOpen("after reset, iter %d tr LH %f, startLH %f\n", j, tr->likelihood, tr->startLH); assert(fabs(curstate->tr->startLH - tr->likelihood) < 1.0E-10); } inserts++; /* need to print status */ if (j % 50 == 0) { t = gettime(); printBothOpen("sampled at iter %d, tr LH %f, startLH %f, prior %f, incr %f\n",j, tr->likelihood, tr->startLH, curstate->curprior, tr->likelihood - tr->startLH); boolean printBranchLengths = TRUE; /*printSimpleTree(tr, printBranchLengths, adef);*/ //TODO: print some parameters to a file printStateFile(j,curstate); printTime += gettime() - t; } } t = gettime(); treeEvaluate(tr, 1); blTime += gettime() - t; printBothOpen("accepted SPR %d, accepted stNNI %d, accepted BL %d, accepted model %d, accepted gamma %d, num moves tried %d, SPRs with max radius %d\n", accepted_spr, accepted_nni, accepted_bl, accepted_model, accepted_gamma, num_moves, maxradius); printBothOpen("rejected SPR %d, rejected stNNI %d, rejected BL %d, rejected model %d, rejected gamma %d\n", rejected_spr, rejected_nni, rejected_bl, rejected_model, rejected_gamma); printBothOpen("ratio SPR %f, ratio stNNI %f, ratio BL %f, ratio model %f, ratio gamma %f\n", accepted_spr/(double)(rejected_spr+accepted_spr), accepted_nni/(double)(rejected_nni+accepted_nni), accepted_bl/(double)(rejected_bl+accepted_bl), accepted_model/(double)(rejected_model+accepted_model), accepted_gamma/(double)(rejected_gamma+accepted_gamma)); printBothOpen("total %f, BL %f, printing %f, proposal %f\n", gettime()- t_start, blTime, printTime, proposalTime); assert(inserts == num_moves); state_free(curstate); }
static boolean stNNIproposal(state *s) { //s->newprior = 1; s->bl_prior = 0; int attempts = 0; do{ s->p = selectRandomInnerSubtree(s->tr); /* TODOFER do this ad hoc for NNI requirements*/ if (++attempts > 500) return FALSE; }while(isTip(s->p->number, s->tr->mxtips) || isTip(s->p->back->number, s->tr->mxtips)); assert(!isTip(s->p->number, s->tr->mxtips)); nodeptr p = s->p, q = s->p->back, pb1 = s->p->next->back, pb2 = s->p->next->next->back; assert(!isTip(q->number, s->tr->mxtips)); nodeptr qb1 = q->next->back, qb2 = q->next->next->back; recordNNIBranchInfo(p, s->tr->numBranches); /* do only one type of NNI, nni1 */ double randprop = (double)rand()/(double)RAND_MAX; boolean changeBL = TRUE; if (randprop < 1.0 / 3.0) { s->whichNNI = 1; if(!changeBL) { hookup(p, q, p->z, s->tr->numBranches); hookup(p->next, qb1, q->next->z, s->tr->numBranches); hookup(p->next->next, pb2, p->next->next->z, s->tr->numBranches); hookup(q->next, pb1, p->next->z, s->tr->numBranches); hookup(q->next->next, qb2, q->next->next->z, s->tr->numBranches); } else { hookupBL(p, q, p, s); hookupBL(p->next, qb1, q->next, s); hookupBL(p->next->next, pb2, p->next->next, s); hookupBL(q->next, pb1, p->next, s); hookupBL(q->next->next, qb2, q->next->next, s); } } else if (randprop < 2.0 / 3.0) { s->whichNNI = 2; if(!changeBL) { hookup(p, q, p->z, s->tr->numBranches); hookup(p->next, pb1, p->next->z, s->tr->numBranches); hookup(p->next->next, qb1, q->next->z, s->tr->numBranches); hookup(q->next, pb2, p->next->next->z, s->tr->numBranches); hookup(q->next->next, qb2, q->next->next->z, s->tr->numBranches); } else { hookupBL(p, q, p, s); hookupBL(p->next, pb1, p->next, s); hookupBL(p->next->next, qb1, q->next, s); hookupBL(q->next, pb2, p->next->next, s); hookupBL(q->next->next, qb2, q->next->next, s); } } else { /* change only the branch lengths */ s->whichNNI = 0; if(changeBL) { /* do it like this for symmetry */ hookupBL(p, q, p, s); hookupBL(p->next, pb1, p->next, s); hookupBL(p->next->next, pb2, p->next->next, s); hookupBL(q->next, qb1, q->next, s); hookupBL(q->next->next, qb2, q->next->next, s); } } newviewGeneric(s->tr, p, FALSE); newviewGeneric(s->tr, p->back, FALSE); evaluateGeneric(s->tr, p, FALSE); return TRUE; }
int main (int argc, char * argv[]) { pllAlignmentData *alignmentData1, *alignmentData2; pllInstance * tr, *tr2; pllNewickTree * newick; partitionList * partitions, *partitions2; struct pllQueue * parts; int i; if (argc != 4) { fprintf (stderr, "usage: %s [phylip-file] [newick-file] [partition-file]\n", argv[0]); return (EXIT_FAILURE); } /* Create a PLL tree */ tr = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345); tr2 = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345); /* Parse a PHYLIP file */ alignmentData1= pllParsePHYLIP (argv[1]); alignmentData2 = pllParsePHYLIP (argv[1]); if (!alignmentData1) { fprintf (stderr, "Error while parsing %s\n", argv[1]); return (EXIT_FAILURE); } /* Parse a NEWICK file */ newick = pllNewickParseFile (argv[2]); if (!newick) { fprintf (stderr, "Error while parsing newick file %s\n", argv[2]); return (EXIT_FAILURE); } if (!pllValidateNewick (newick)) /* check whether the valid newick tree is also a tree that can be processed with our nodeptr structure */ { fprintf (stderr, "Invalid phylogenetic tree\n"); return (EXIT_FAILURE); } /* Parse the partitions file into a partition queue structure */ parts = pllPartitionParse (argv[3]); /* Validate the partitions */ if (!pllPartitionsValidate (parts, alignmentData1)) { fprintf (stderr, "Error: Partitions do not cover all sites\n"); return (EXIT_FAILURE); } /* commit the partitions and build a partitions structure */ partitions = pllPartitionsCommit (parts, alignmentData1); partitions2 = pllPartitionsCommit (parts, alignmentData2); /* destroy the intermedia partition queue structure */ pllQueuePartitionsDestroy (&parts); /* eliminate duplicate sites from the alignment and update weights vector */ pllPhylipRemoveDuplicate (alignmentData1, partitions); pllPhylipRemoveDuplicate (alignmentData2, partitions2); /* Set the topology of the PLL tree from a parsed newick tree */ //pllTreeInitTopologyNewick (tr, newick, PLL_TRUE); /* Or instead of the previous function use the next commented line to create a random tree topology pllTreeInitTopologyRandom (tr, phylip->nTaxa, phylip->label); */ pllTreeInitTopologyForAlignment(tr, alignmentData1); /* Connect the alignment with the tree structure */ if (!pllLoadAlignment (tr, alignmentData1, partitions, PLL_DEEP_COPY)) { fprintf (stderr, "Incompatible tree/alignment combination\n"); return (EXIT_FAILURE); } /* Initialize the model TODO: Put the parameters in a logical order and change the TRUE to flags */ pllInitModel(tr, alignmentData1, partitions); /* TODO transform into pll functions !*/ /* allocateParsimonyDataStructures(tr, partitions); pllMakeParsimonyTreeFast(tr, partitions); pllFreeParsimonyDataStructures(tr, partitions); */ pllComputeRandomizedStepwiseAdditionParsimonyTree(tr, partitions); pllTreeToNewick (tr->tree_string, tr, partitions, tr->start->back, PLL_TRUE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE); printf ("Tree: %s %d\n", tr->tree_string, tr->start->number); evaluateGeneric(tr, partitions, tr->start, PLL_TRUE, PLL_FALSE); double firstTree = tr->likelihood; printf("%f \n", tr->likelihood); //computeBIGRAPID_Test(tr, partitions, PLL_TRUE); printf("final like %f\n", tr->likelihood); //pllInitModel(tr, PLL_TRUE, phylip, partitions); pllTreeInitTopologyNewick (tr2, newick, PLL_TRUE); if (!pllLoadAlignment (tr2, alignmentData2, partitions2, PLL_DEEP_COPY)) { fprintf (stderr, "Incompatible tree/alignment combination\n"); return (EXIT_FAILURE); } pllInitModel(tr2, alignmentData2, partitions2); pllTreeToNewick (tr2->tree_string, tr2, partitions2, tr2->start->back, PLL_TRUE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE); printf ("Tree: %s %d\n", tr2->tree_string, tr2->start->number); evaluateGeneric(tr2, partitions2, tr2->start, PLL_TRUE, PLL_FALSE); printf("%f \n", tr2->likelihood); double secondTree = tr2->likelihood; assert(firstTree == secondTree); pllOptimizeModelParameters(tr2, partitions2, 10.0); printf("%f \n", tr2->likelihood); pllAlignmentDataDestroy (alignmentData1); pllNewickParseDestroy (&newick); pllPartitionsDestroy (tr, &partitions); pllTreeDestroy (tr); pllAlignmentDataDestroy (alignmentData2); pllPartitionsDestroy (&partitions2, tr2->mxtips); pllTreeDestroy (tr2); for(i = 0; i < 5; i++) { //write a simple partition file with 3 partitions //for dataset dna.phy.dat contained //in this source directory FILE *f = fopen("dummy", "w"); fprintf(f, "DNA, p1 = 1-200\n"); fprintf(f, "DNA, p1 = 201-400\n"); fprintf(f, "DNA, p1 = 401-705\n"); fclose(f); tr = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345); alignmentData1= pllParsePHYLIP (argv[1]); newick = pllNewickParseFile (argv[2]); parts = pllPartitionParse ("dummy"); /* Validate the partitions */ if (!pllPartitionsValidate (parts, alignmentData1)) { fprintf (stderr, "Error: Partitions do not cover all sites\n"); return (EXIT_FAILURE); } /* commit the partitions and build a partitions structure */ partitions = pllPartitionsCommit (parts, alignmentData1); /* destroy the intermedia partition queue structure */ pllQueuePartitionsDestroy (&parts); /* eliminate duplicate sites from the alignment and update weights vector */ pllPhylipRemoveDuplicate (alignmentData1, partitions); pllTreeInitTopologyNewick (tr, newick, PLL_TRUE); if (!pllLoadAlignment (tr, alignmentData1, partitions, PLL_DEEP_COPY)) { fprintf (stderr, "Incompatible tree/alignment combination\n"); return (EXIT_FAILURE); } pllInitModel(tr, alignmentData1, partitions); switch(i) { case 0: //link params in one way pllLinkAlphaParameters("0,1,2", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,2", partitions); break; case 1: //link params in another way pllLinkAlphaParameters("0,0,0", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,2", partitions); break; case 2: //link params in yet another way pllLinkAlphaParameters("0,0,0", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,0", partitions); break; case 3: //also fiddle around with the Q matrices, make them to be non-GTR, but simpler pllLinkAlphaParameters("0,1,2", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,2", partitions); //these are GTR models pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 0); pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 1); //this is a simpler model with 5 parameters, parameter a and f have //the same value pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,0", partitions, 2); break; case 4: { //test case to show how the model parameters can be set to fixed values // set up arrays of user-defined base frequencies // and a user defined q matrix double f[4] = {0.25, 0.25, 0.25, 0.25}, q[6] = {1.0, 1.0, 1.0, 1.0, 1.0, 0.5}; //unlink alpha parameters base frequencies and Q matrices //across all partitions pllLinkAlphaParameters("0,1,2", partitions); pllLinkFrequencies("0,0,1", partitions); pllLinkRates("0,1,2", partitions); //set alpha to a fixed value of 1.0 for partition 0 and //parition 1 pllSetFixedAlpha(1.0, 0, partitions, tr); pllSetFixedAlpha(1.0, 1, partitions, tr); //fix the base frequencies to 0.25 for //partitions 0 and 1 pllSetFixedBaseFrequencies(f, 4, 0, partitions, tr); pllSetFixedBaseFrequencies(f, 4, 1, partitions, tr); //set the Q matrix to fixed values for partition //0 pllSetFixedSubstitutionMatrix(q, 6, 0, partitions, tr); } break; default: assert(0); } evaluateGeneric(tr, partitions, tr->start, PLL_TRUE, PLL_FALSE); printf("%f \n", tr->likelihood); pllOptimizeModelParameters(tr, partitions, 10.0); //print the model parameters printModelParameters(partitions); printf("%f \n", tr->likelihood); //cleanup pllAlignmentDataDestroy (alignmentData1); pllNewickParseDestroy (&newick); pllPartitionsDestroy (&partitions, tr->mxtips); pllTreeDestroy (tr); } testProteinStuff(); return (EXIT_SUCCESS); }
static void testProteinStuff() { pllAlignmentData * alignmentData; pllInstance * tr; pllNewickTree * newick; partitionList * partitions; struct pllQueue * parts; int i; for(i = 0; i < 5; i++) { //write a simple partition file with 3 partitions //for dataset dna.phy.dat contained //in this source directory FILE *f = fopen("proteinPartitions", "w"); switch(i) { case 0: fprintf(f, "WAG, p1 = 1-200\n"); fprintf(f, "WAG, p2 = 201-600\n"); fprintf(f, "WAG, p3 = 601-1104\n"); break; case 1: fprintf(f, "LG, p1 = 1-200\n"); fprintf(f, "LG, p2 = 201-600\n"); fprintf(f, "LG, p3 = 601-1104\n"); break; case 2: fprintf(f, "JTT, p1 = 1-200\n"); fprintf(f, "JTT, p2 = 201-600\n"); fprintf(f, "JTT, p3 = 601-1104\n"); break; case 3: case 4: fprintf(f, "GTR, p1 = 1-200\n"); fprintf(f, "GTR, p2 = 201-600\n"); fprintf(f, "GTR, p3 = 601-1104\n"); break; default: assert(0); } fclose(f); tr = pllCreateInstance (GAMMA, PLL_FALSE, PLL_FALSE, PLL_FALSE, 12345); alignmentData = pllParsePHYLIP ("prot.phy"); /* or alternatively, parse a FASTA file */ // alignmentData = pllParseFASTA ("prot.phy"); newick = pllNewickParseFile("parsimonyTree"); parts = pllPartitionParse ("proteinPartitions"); /* Validate the partitions */ if (!pllPartitionsValidate (parts, alignmentData)) { fprintf (stderr, "Error: Partitions do not cover all sites\n"); return (EXIT_FAILURE); } /* commit the partitions and build a partitions structure */ partitions = pllPartitionsCommit (parts, alignmentData); /* destroy the intermedia partition queue structure */ pllQueuePartitionsDestroy (&parts); /* eliminate duplicate sites from the alignment and update weights vector */ pllPhylipRemoveDuplicate (alignmentData, partitions); pllTreeInitTopologyNewick (tr, newick, PLL_TRUE); if (!pllLoadAlignment (tr, alignmentData, partitions, PLL_DEEP_COPY)) { fprintf (stderr, "Incompatible tree/alignment combination\n"); return (EXIT_FAILURE); } //pllInitModel(tr, PLL_TRUE, alignmentData, partitions); pllInitModel(tr, alignmentData, partitions); switch(i) { case 0: //all params unlinked pllLinkAlphaParameters("0,1,2", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,2", partitions); break; case 1: //link params in another way pllLinkAlphaParameters("0,0,0", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,2", partitions); break; case 2: //link params in yet another way pllLinkAlphaParameters("0,0,0", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,0", partitions); break; case 3: //also fiddle around with the Q matrices, make them to be non-GTR, but simpler pllLinkAlphaParameters("0,1,2", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,1,2", partitions); //these are GTR models //pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 0); //pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,5", partitions, 1); //this is a simpler model with 5 parameters, parameter a and f have //the same value //pllSetSubstitutionRateMatrixSymmetries("0,1,2,3,4,0", partitions, 2); break; case 4: { //test case to show how the model parameters can be set to fixed values // set up arrays of user-defined base frequencies // and a user defined q matrix double f[4] = {0.25, 0.25, 0.25, 0.25}, q[6] = {1.0, 1.0, 1.0, 1.0, 1.0, 0.5}; //unlink alpha parameters base frequencies and Q matrices //across all partitions pllLinkAlphaParameters("0,1,2", partitions); pllLinkFrequencies("0,1,2", partitions); pllLinkRates("0,0,0", partitions); //set alpha to a fixed value of 1.0 for partition 0 and //parition 1 //pllSetFixedAlpha(1.0, 0, partitions, tr); //pllSetFixedAlpha(1.0, 1, partitions, tr); //fix the base frequencies to 0.25 for //partitions 0 and 1 //pllSetFixedBaseFrequencies(f, 4, 0, partitions, tr); //pllSetFixedBaseFrequencies(f, 4, 1, partitions, tr); //set the Q matrix to fixed values for partition //0 //pllSetFixedSubstitutionMatrix(q, 6, 0, partitions, tr); } break; default: assert(0); } evaluateGeneric(tr, partitions, tr->start, PLL_TRUE, PLL_FALSE); printf("%f \n", tr->likelihood); pllOptimizeModelParameters(tr, partitions, 1.0); //print the model parameters //printModelParameters(partitions); printf("%f \n", tr->likelihood); //cleanup pllAlignmentDataDestroy (alignmentData); pllNewickParseDestroy (&newick); pllPartitionsDestroy (tr, &partitions); pllTreeDestroy (tr); } }
boolean update(tree *tr, nodeptr p) { nodeptr q; boolean smoothedPartitions[NUM_BRANCHES]; int i; double z[NUM_BRANCHES], z0[NUM_BRANCHES]; double _deltaz; #ifdef _DEBUG_UPDATE double startLH; evaluateGeneric(tr, p); startLH = tr->likelihood; #endif q = p->back; for(i = 0; i < tr->numBranches; i++) z0[i] = q->z[i]; if(tr->numBranches > 1) makenewzGeneric(tr, p, q, z0, newzpercycle, z, TRUE); else makenewzGeneric(tr, p, q, z0, newzpercycle, z, FALSE); for(i = 0; i < tr->numBranches; i++) smoothedPartitions[i] = tr->partitionSmoothed[i]; for(i = 0; i < tr->numBranches; i++) { if(!tr->partitionConverged[i]) { _deltaz = deltaz; if(ABS(z[i] - z0[i]) > _deltaz) { smoothedPartitions[i] = FALSE; } p->z[i] = q->z[i] = z[i]; } } #ifdef _DEBUG_UPDATE evaluateGeneric(tr, p); if(tr->likelihood <= startLH) { if(fabs(tr->likelihood - startLH) > 0.01) { printf("%f %f\n", startLH, tr->likelihood); assert(0); } } #endif for(i = 0; i < tr->numBranches; i++) tr->partitionSmoothed[i] = smoothedPartitions[i]; return TRUE; }
int main(int argc, char * argv[]) { tree * tr; if (argc != 2) { fprintf (stderr, "syntax: %s [binary-alignment-file]\n", argv[0]); return (1); } tr = (tree *)malloc(sizeof(tree)); /* read the binary input, setup tree, initialize model with alignment */ read_msa(tr,argv[1]); tr->randomNumberSeed = 665; makeRandomTree(tr); printf("Number of taxa: %d\n", tr->mxtips); printf("Number of partitions: %d\n", tr->NumberOfModels); /* compute the LH of the full tree */ printf ("Virtual root: %d\n", tr->start->number); evaluateGeneric(tr, tr->start, TRUE); printf("Likelihood: %f\n", tr->likelihood); /* 8 rounds of branch length optimization */ smoothTree(tr, 1); evaluateGeneric(tr, tr->start, TRUE); printf("Likelihood after branch length optimization: %.20f\n", tr->likelihood); /* Now we show how to find a particular LH vector for a node */ int i; int node_number = tr->mxtips + 1; nodeptr p = tr->nodep[node_number]; printf("Pointing to node %d\n", p->number); /* Fix as VR */ newviewGeneric(tr, p, FALSE); newviewGeneric(tr, p->back, FALSE); evaluateGeneric(tr, p, FALSE); printf("Likelihood : %.f\n", tr->likelihood); printf("Make a copy of LH vector for node %d\n", p->number); likelihood_vector *vector = copy_likelihood_vectors(tr, p); for(i=0; i<vector->num_partitions; i++) printf("Partition %d requires %d bytes\n", i, (int)vector->partition_sizes[i]); /* Check we have the same vector in both tree and copied one */ assert(same_vector(tr, p, vector)); /* Now force the p to get a new value (generally branch lengths are NOT updated like this) */ /* This is just an example to show usage (for fast NNI eval), manually updating vectors is not recommended! */ printf("bl : %.40f\n", p->next->z[0]); p->next->z[0] = p->next->back->z[0] = zmin; printf("bl : %.40f\n", p->next->z[0]); newviewGeneric(tr, p, FALSE); assert(!same_vector(tr, p, vector)); evaluateGeneric(tr, p, FALSE); printf("Likelihood : %f\n", tr->likelihood); restore_vector(tr, p, vector); assert(same_vector(tr, p, vector)); evaluateGeneric(tr, p, FALSE); printf("Likelihood after manually restoring the vector : %f\n", tr->likelihood); free_likelihood_vector(vector); /* Pick an inner branch */ printf("numBranches %d \n", tr->numBranches); //tr->numBranches = 1; p = tr->nodep[tr->mxtips + 1]; int partition_id = 0; /* single partition */ double bl = get_branch_length(tr, p, partition_id); printf("z value: %f , bl value %f\n", p->z[partition_id], bl); /* set the bl to 2.5 */ double new_bl = 2.5; set_branch_length(tr, p, partition_id, new_bl); printf("Changed BL to %f\n", new_bl); printf("new z value: %f , new bl value %f\n", p->z[partition_id], get_branch_length(tr, p, partition_id)); /* set back to original */ printf("Changed to previous BL\n"); set_branch_length(tr, p, partition_id, bl); printf("new z value: %f , new bl value %f\n", p->z[partition_id], get_branch_length(tr, p, partition_id)); return (0); }
static void resetSimpleModelProposal(state * instate) { restoreSubsRates(instate->tr, instate->adef, instate->model, instate->numSubsRates, instate->curSubsRates); evaluateGeneric(instate->tr, instate->tr->start, FALSE); }