void doAllInOne(tree *tr, analdef *adef) { int i, n, bestIndex, bootstrapsPerformed; #ifdef _WAYNE_MPI int bootStopTests = 1, j, bootStrapsPerProcess = 0; #endif double loopTime; int *originalRateCategories; int *originalInvariant; #ifdef _WAYNE_MPI int slowSearches, fastEvery; #else int slowSearches, fastEvery = 5; #endif int treeVectorLength = -1; topolRELL_LIST *rl; double bestLH, mlTime, overallTime; long radiusSeed = adef->rapidBoot; FILE *f; char bestTreeFileName[1024]; hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int**)NULL; boolean bootStopIt = FALSE; double pearsonAverage = 0.0; pInfo *catParams = allocParams(tr); pInfo *gammaParams = allocParams(tr); unsigned int vLength; n = adef->multipleRuns; #ifdef _WAYNE_MPI if(n % processes != 0) n = processes * ((n / processes) + 1); #endif if(adef->bootStopping) { h = initHashTable(tr->mxtips * 100); treeVectorLength = adef->multipleRuns; bitVectors = initBitVector(tr, &vLength); } rl = (topolRELL_LIST *)rax_malloc(sizeof(topolRELL_LIST)); initTL(rl, tr, n); originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); initModel(tr, tr->rdta, tr->cdta, adef); if(adef->grouping) printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the constraint tree specified in %s\n", tree_file); if(adef->constraint) printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the bifurcating backbone constraint tree specified in %s\n", tree_file); #ifdef _WAYNE_MPI long parsimonySeed0 = adef->parsimonySeed; long replicateSeed0 = adef->rapidBoot; n = n / processes; #endif for(i = 0; i < n && !bootStopIt; i++) { #ifdef _WAYNE_MPI j = i + n * processID; tr->treeID = j; #else tr->treeID = i; #endif tr->checkPointCounter = 0; loopTime = gettime(); #ifdef _WAYNE_MPI if(i == 0) { if(parsimonySeed0 != 0) adef->parsimonySeed = parsimonySeed0 + 10000 * processID; adef->rapidBoot = replicateSeed0 + 10000 * processID; radiusSeed = adef->rapidBoot; } #endif if(i % 10 == 0) { if(i > 0) reductionCleanup(tr, originalRateCategories, originalInvariant); if(adef->grouping || adef->constraint) { FILE *f = myfopen(tree_file, "rb"); assert(adef->restart); if (! treeReadLenMULT(f, tr, adef)) exit(-1); fclose(f); } else makeParsimonyTree(tr, adef); tr->likelihood = unlikely; if(i == 0) { double t; onlyInitrav(tr, tr->start); treeEvaluate(tr, 1); t = gettime(); modOpt(tr, adef, FALSE, 5.0); #ifdef _WAYNE_MPI printBothOpen("\nTime for BS model parameter optimization on Process %d: %f seconds\n", processID, gettime() - t); #else printBothOpen("\nTime for BS model parameter optimization %f\n", gettime() - t); #endif memcpy(originalRateCategories, tr->cdta->rateCategory, sizeof(int) * tr->cdta->endsite); memcpy(originalInvariant, tr->invariant, sizeof(int) * tr->cdta->endsite); if(adef->bootstrapBranchLengths) { if(tr->rateHetModel == CAT) { copyParams(tr->NumberOfModels, catParams, tr->partitionData, tr); assert(tr->cdta->endsite == tr->originalCrunchedLength); catToGamma(tr, adef); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); copyParams(tr->NumberOfModels, gammaParams, tr->partitionData, tr); gammaToCat(tr); copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr); } else { assert(tr->cdta->endsite == tr->originalCrunchedLength); } } } } computeNextReplicate(tr, &adef->rapidBoot, originalRateCategories, originalInvariant, TRUE, TRUE); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); computeBOOTRAPID(tr, adef, &radiusSeed); #ifdef _WAYNE_MPI saveTL(rl, tr, j); #else saveTL(rl, tr, i); #endif if(adef->bootstrapBranchLengths) { double lh = tr->likelihood; if(tr->rateHetModel == CAT) { copyParams(tr->NumberOfModels, tr->partitionData, gammaParams, tr); catToGamma(tr, adef); resetBranches(tr); onlyInitrav(tr, tr->start); treeEvaluate(tr, 2.0); gammaToCat(tr); copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr); tr->likelihood = lh; } else { treeEvaluate(tr, 2.0); tr->likelihood = lh; } } printBootstrapResult(tr, adef, TRUE); loopTime = gettime() - loopTime; writeInfoFile(adef, tr, loopTime); if(adef->bootStopping) #ifdef _WAYNE_MPI { int nn = (i + 1) * processes; if((nn > START_BSTOP_TEST) && (i * processes < FC_SPACING * bootStopTests) && ((i + 1) * processes >= FC_SPACING * bootStopTests) ) { MPI_Barrier(MPI_COMM_WORLD); concatenateBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); bootStopIt = computeBootStopMPI(tr, bootstrapFileName, adef, &pearsonAverage); bootStopTests++; } } #else bootStopIt = bootStop(tr, h, i, &pearsonAverage, bitVectors, treeVectorLength, vLength, adef); #endif } #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); bootstrapsPerformed = i * processes; bootStrapsPerProcess = i; concatenateBSFiles(processes, bootstrapFileName); removeBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); #else bootstrapsPerformed = i; #endif rax_freeParams(tr->NumberOfModels, catParams); rax_free(catParams); rax_freeParams(tr->NumberOfModels, gammaParams); rax_free(gammaParams); if(adef->bootStopping) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); } { double t; printBothOpenMPI("\n\n"); if(adef->bootStopping) { if(bootStopIt) { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } else { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } } t = gettime() - masterTime; printBothOpenMPI("Overall Time for %d Rapid Bootstraps %f seconds\n", bootstrapsPerformed, t); printBothOpenMPI("Average Time per Rapid Bootstrap %f seconds\n", (double)(t/((double)bootstrapsPerformed))); if(!adef->allInOne) { printBothOpenMPI("All %d bootstrapped trees written to: %s\n", bootstrapsPerformed, bootstrapFileName); #ifdef _WAYNE_MPI MPI_Finalize(); #endif exit(0); } } /* ML-search */ mlTime = gettime(); double t = mlTime; printBothOpenMPI("\nStarting ML Search ...\n\n"); /***CLEAN UP reduction stuff */ reductionCleanup(tr, originalRateCategories, originalInvariant); /****/ #ifdef _WAYNE_MPI restoreTL(rl, tr, n * processID); #else restoreTL(rl, tr, 0); #endif resetBranches(tr); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); #ifdef _WAYNE_MPI if(bootstrapsPerformed <= 100) fastEvery = 5; else fastEvery = bootstrapsPerformed / 20; for(i = 0; i < bootstrapsPerformed; i++) rl->t[i]->likelihood = unlikely; for(i = 0; i < bootStrapsPerProcess; i++) { j = i + n * processID; if(i % fastEvery == 0) { restoreTL(rl, tr, j); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); optimizeRAPID(tr, adef); saveTL(rl, tr, j); } } #else for(i = 0; i < bootstrapsPerformed; i++) { rl->t[i]->likelihood = unlikely; if(i % fastEvery == 0) { restoreTL(rl, tr, i); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); optimizeRAPID(tr, adef); saveTL(rl, tr, i); } } #endif printBothOpenMPI("Fast ML optimization finished\n\n"); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Fast ML search on Process %d: Time %f seconds\n\n", processID, t); j = n * processID; qsort(&(rl->t[j]), n, sizeof(topolRELL*), compareTopolRell); restoreTL(rl, tr, j); #else printBothOpen("Fast ML search Time: %f seconds\n\n", t); qsort(&(rl->t[0]), bootstrapsPerformed, sizeof(topolRELL*), compareTopolRell); restoreTL(rl, tr, 0); #endif t = gettime(); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); slowSearches = bootstrapsPerformed / 5; if(bootstrapsPerformed % 5 != 0) slowSearches++; slowSearches = MIN(slowSearches, 10); #ifdef _WAYNE_MPI if(processes > 1) { if(slowSearches % processes == 0) slowSearches = slowSearches / processes; else slowSearches = (slowSearches / processes) + 1; } for(i = 0; i < slowSearches; i++) { j = i + n * processID; restoreTL(rl, tr, j); rl->t[j]->likelihood = unlikely; evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1.0); thoroughOptimization(tr, adef, rl, j); } #else for(i = 0; i < slowSearches; i++) { restoreTL(rl, tr, i); rl->t[i]->likelihood = unlikely; evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1.0); thoroughOptimization(tr, adef, rl, i); } #endif /*************************************************************************************************************/ if(tr->rateHetModel == CAT) { catToGamma(tr, adef); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); } bestIndex = -1; bestLH = unlikely; #ifdef _WAYNE_MPI for(i = 0; i < slowSearches; i++) { j = i + n * processID; restoreTL(rl, tr, j); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); printBothOpen("Slow ML Search %d Likelihood: %f\n", j, tr->likelihood); if(tr->likelihood > bestLH) { bestLH = tr->likelihood; bestIndex = j; } } /*printf("processID = %d, bestIndex = %d; bestLH = %f\n", processID, bestIndex, bestLH);*/ #else for(i = 0; i < slowSearches; i++) { restoreTL(rl, tr, i); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); printBothOpen("Slow ML Search %d Likelihood: %f\n", i, tr->likelihood); if(tr->likelihood > bestLH) { bestLH = tr->likelihood; bestIndex = i; } } #endif printBothOpenMPI("Slow ML optimization finished\n\n"); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Slow ML search on Process %d: Time %f seconds\n", processID, t); #else printBothOpen("Slow ML search Time: %f seconds\n", t); #endif t = gettime(); restoreTL(rl, tr, bestIndex); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); Thorough = 1; tr->doCutoff = FALSE; treeOptimizeThorough(tr, 1, 10); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Thorough ML search on Process %d: Time %f seconds\n", processID, t); #else printBothOpen("Thorough ML search Time: %f seconds\n", t); #endif #ifdef _WAYNE_MPI bestLH = tr->likelihood; printf("\nprocessID = %d, bestLH = %f\n", processID, bestLH); if(processes > 1) { double *buffer; int bestProcess; buffer = (double *)rax_malloc(sizeof(double) * processes); for(i = 0; i < processes; i++) buffer[i] = unlikely; buffer[processID] = bestLH; for(i = 0; i < processes; i++) MPI_Bcast(&buffer[i], 1, MPI_DOUBLE, i, MPI_COMM_WORLD); bestLH = buffer[0]; bestProcess = 0; for(i = 1; i < processes; i++) if(buffer[i] > bestLH) { bestLH = buffer[i]; bestProcess = i; } rax_free(buffer); if(processID != bestProcess) { MPI_Finalize(); exit(0); } } #endif printBothOpen("\nFinal ML Optimization Likelihood: %f\n", tr->likelihood); printBothOpen("\nModel Information:\n\n"); printModelParams(tr, adef); strcpy(bestTreeFileName, workdir); strcat(bestTreeFileName, "RAxML_bestTree."); strcat(bestTreeFileName, run_id); Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); f = myfopen(bestTreeFileName, "wb"); fprintf(f, "%s", tr->tree_string); fclose(f); if(adef->perGeneBranchLengths) printTreePerGene(tr, adef, bestTreeFileName, "w"); overallTime = gettime() - masterTime; mlTime = gettime() - mlTime; printBothOpen("\nML search took %f secs or %f hours\n", mlTime, mlTime / 3600.0); printBothOpen("\nCombined Bootstrap and ML search took %f secs or %f hours\n", overallTime, overallTime / 3600.0); printBothOpen("\nDrawing Bootstrap Support Values on best-scoring ML tree ...\n\n"); freeTL(rl); rax_free(rl); calcBipartitions(tr, adef, bestTreeFileName, bootstrapFileName); overallTime = gettime() - masterTime; printBothOpen("Program execution info written to %s\n", infoFileName); printBothOpen("All %d bootstrapped trees written to: %s\n\n", bootstrapsPerformed, bootstrapFileName); printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName); if(adef->perGeneBranchLengths && tr->NumberOfModels > 1) printBothOpen("Per-Partition branch lengths of best-scoring ML tree written to %s.PARTITION.0 to %s.PARTITION.%d\n\n", bestTreeFileName, bestTreeFileName, tr->NumberOfModels - 1); printBothOpen("Best-scoring ML tree with support values written to: %s\n\n", bipartitionsFileName); printBothOpen("Best-scoring ML tree with support values as branch labels written to: %s\n\n", bipartitionsFileNameBranchLabels); printBothOpen("Overall execution time for full ML analysis: %f secs or %f hours or %f days\n\n", overallTime, overallTime/3600.0, overallTime/86400.0); #ifdef _WAYNE_MPI MPI_Finalize(); #endif exit(0); }
void computeBIGRAPID (tree *tr, analdef *adef, boolean estimateModel) { unsigned int vLength = 0; int i, impr, bestTrav, rearrangementsMax = 0, rearrangementsMin = 0, thoroughIterations = 0, fastIterations = 0; double lh, previousLh, difference, epsilon; bestlist *bestT, *bt; #ifdef _TERRACES /* store the 20 best trees found in a dedicated list */ bestlist *terrace; /* output file names */ char terraceFileName[1024], buf[64]; #endif hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int**)NULL; if(tr->searchConvergenceCriterion) { bitVectors = initBitVector(tr, &vLength); h = initHashTable(tr->mxtips * 4); } bestT = (bestlist *) rax_malloc(sizeof(bestlist)); bestT->ninit = 0; initBestTree(bestT, 1, tr->mxtips); bt = (bestlist *) rax_malloc(sizeof(bestlist)); bt->ninit = 0; initBestTree(bt, 20, tr->mxtips); #ifdef _TERRACES /* initialize the tree list and the output file name for the current tree search/replicate */ terrace = (bestlist *) rax_malloc(sizeof(bestlist)); terrace->ninit = 0; initBestTree(terrace, 20, tr->mxtips); sprintf(buf, "%d", bCount); strcpy(terraceFileName, workdir); strcat(terraceFileName, "RAxML_terrace."); strcat(terraceFileName, run_id); strcat(terraceFileName, ".BS."); strcat(terraceFileName, buf); printf("%s\n", terraceFileName); #endif initInfoList(50); difference = 10.0; epsilon = 0.01; Thorough = 0; if(estimateModel) { if(adef->useBinaryModelFile) { readBinaryModel(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); } else { evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, FALSE, 10.0); } } else treeEvaluate(tr, 2); printLog(tr, adef, FALSE); saveBestTree(bestT, tr); if(!adef->initialSet) bestTrav = adef->bestTrav = determineRearrangementSetting(tr, adef, bestT, bt); else bestTrav = adef->bestTrav = adef->initial; if(estimateModel) { if(adef->useBinaryModelFile) treeEvaluate(tr, 2); else { evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, FALSE, 5.0); } } else treeEvaluate(tr, 1); saveBestTree(bestT, tr); impr = 1; if(tr->doCutoff) tr->itCount = 0; while(impr) { recallBestTree(bestT, 1, tr); if(tr->searchConvergenceCriterion) { int bCounter = 0; if(fastIterations > 1) cleanupHashTable(h, (fastIterations % 2)); bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, fastIterations % 2, BIPARTITIONS_RF, (branchInfo *)NULL, &bCounter, 1, FALSE, FALSE); assert(bCounter == tr->mxtips - 3); if(fastIterations > 0) { double rrf = convergenceCriterion(h, tr->mxtips); if(rrf <= 0.01) /* 1% cutoff */ { printBothOpen("ML fast search converged at fast SPR cycle %d with stopping criterion\n", fastIterations); printBothOpen("Relative Robinson-Foulds (RF) distance between respective best trees after one succseful SPR cycle: %f%s\n", rrf, "%"); cleanupHashTable(h, 0); cleanupHashTable(h, 1); goto cleanup_fast; } else printBothOpen("ML search convergence criterion fast cycle %d->%d Relative Robinson-Foulds %f\n", fastIterations - 1, fastIterations, rrf); } } fastIterations++; treeEvaluate(tr, 1.0); saveBestTree(bestT, tr); printLog(tr, adef, FALSE); printResult(tr, adef, FALSE); lh = previousLh = tr->likelihood; treeOptimizeRapid(tr, 1, bestTrav, adef, bt); impr = 0; for(i = 1; i <= bt->nvalid; i++) { recallBestTree(bt, i, tr); treeEvaluate(tr, 0.25); difference = ((tr->likelihood > previousLh)? tr->likelihood - previousLh: previousLh - tr->likelihood); if(tr->likelihood > lh && difference > epsilon) { impr = 1; lh = tr->likelihood; saveBestTree(bestT, tr); } } } if(tr->searchConvergenceCriterion) { cleanupHashTable(h, 0); cleanupHashTable(h, 1); } cleanup_fast: Thorough = 1; impr = 1; recallBestTree(bestT, 1, tr); if(estimateModel) { if(adef->useBinaryModelFile) treeEvaluate(tr, 2); else { evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, FALSE, 1.0); } } else treeEvaluate(tr, 1.0); while(1) { recallBestTree(bestT, 1, tr); if(impr) { printResult(tr, adef, FALSE); rearrangementsMin = 1; rearrangementsMax = adef->stepwidth; if(tr->searchConvergenceCriterion) { int bCounter = 0; if(thoroughIterations > 1) cleanupHashTable(h, (thoroughIterations % 2)); bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, thoroughIterations % 2, BIPARTITIONS_RF, (branchInfo *)NULL, &bCounter, 1, FALSE, FALSE); assert(bCounter == tr->mxtips - 3); if(thoroughIterations > 0) { double rrf = convergenceCriterion(h, tr->mxtips); if(rrf <= 0.01) /* 1% cutoff */ { printBothOpen("ML search converged at thorough SPR cycle %d with stopping criterion\n", thoroughIterations); printBothOpen("Relative Robinson-Foulds (RF) distance between respective best trees after one succseful SPR cycle: %f%s\n", rrf, "%"); goto cleanup; } else printBothOpen("ML search convergence criterion thorough cycle %d->%d Relative Robinson-Foulds %f\n", thoroughIterations - 1, thoroughIterations, rrf); } } thoroughIterations++; } else { rearrangementsMax += adef->stepwidth; rearrangementsMin += adef->stepwidth; if(rearrangementsMax > adef->max_rearrange) goto cleanup; } treeEvaluate(tr, 1.0); previousLh = lh = tr->likelihood; saveBestTree(bestT, tr); printLog(tr, adef, FALSE); treeOptimizeRapid(tr, rearrangementsMin, rearrangementsMax, adef, bt); impr = 0; for(i = 1; i <= bt->nvalid; i++) { recallBestTree(bt, i, tr); treeEvaluate(tr, 0.25); #ifdef _TERRACES /* save all 20 best trees in the terrace tree list */ saveBestTree(terrace, tr); #endif difference = ((tr->likelihood > previousLh)? tr->likelihood - previousLh: previousLh - tr->likelihood); if(tr->likelihood > lh && difference > epsilon) { impr = 1; lh = tr->likelihood; saveBestTree(bestT, tr); } } } cleanup: #ifdef _TERRACES { double bestLH = tr->likelihood; FILE *f = myfopen(terraceFileName, "w"); /* print out likelihood of best tree found */ printf("best tree: %f\n", tr->likelihood); /* print out likelihoods of 20 best trees found during the tree search */ for(i = 1; i <= terrace->nvalid; i++) { recallBestTree(terrace, i, tr); /* if the likelihood scores are smaller than some epsilon 0.000001 print the tree to file */ if(ABS(bestLH - tr->likelihood) < 0.000001) { printf("%d %f\n", i, tr->likelihood); Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); fprintf(f, "%s\n", tr->tree_string); } } fclose(f); /* increment tree search counter */ bCount++; } #endif if(tr->searchConvergenceCriterion) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); } freeBestTree(bestT); rax_free(bestT); freeBestTree(bt); rax_free(bt); #ifdef _TERRACES /* free terrace tree list */ freeBestTree(terrace); rax_free(terrace); #endif freeInfoList(); printLog(tr, adef, FALSE); printResult(tr, adef, FALSE); }
void doBootstrap(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) { int bootstrapsPerformed, i, n, treeVectorLength = -1; unsigned int vLength = 0; #ifdef _WAYNE_MPI int j, bootStopTests = 1; #endif double loopTime, pearsonAverage; hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int **)NULL; boolean bootStopIt = FALSE; n = adef->multipleRuns; #ifdef _WAYNE_MPI if(n % processes != 0) n = processes * ((n / processes) + 1); adef->multipleRuns = n; #endif if(adef->bootStopping) { h = initHashTable(tr->mxtips * 100); bitVectors = initBitVector(tr, &vLength); treeVectorLength = adef->multipleRuns; } #ifdef _WAYNE_MPI long parsimonySeed0 = adef->parsimonySeed; long replicateSeed0 = adef->rapidBoot; long bootstrapSeed0 = adef->boot; n = n / processes; #endif for(i = 0; i < n && !bootStopIt; i++) { loopTime = gettime(); #ifdef _WAYNE_MPI if(i == 0) { if(parsimonySeed0 != 0) adef->parsimonySeed = parsimonySeed0 + 10000 * processID; adef->rapidBoot = replicateSeed0 + 10000 * processID; adef->boot = bootstrapSeed0 + 10000 * processID; } j = i + n*processID; singleBootstrap(tr, j, adef, rdta, cdta); #else singleBootstrap(tr, i, adef, rdta, cdta); #endif loopTime = gettime() - loopTime; writeInfoFile(adef, tr, loopTime); if(adef->bootStopping) #ifdef _WAYNE_MPI { int nn = (i + 1) * processes; if((nn > START_BSTOP_TEST) && (i * processes < FC_SPACING * bootStopTests) && ((i + 1) * processes >= FC_SPACING * bootStopTests) ) { MPI_Barrier(MPI_COMM_WORLD); concatenateBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); bootStopIt = computeBootStopMPI(tr, bootstrapFileName, adef, &pearsonAverage); bootStopTests++; } } #else bootStopIt = bootStop(tr, h, i, &pearsonAverage, bitVectors, treeVectorLength, vLength, adef); #endif } #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); bootstrapsPerformed = i * processes; if(processID == 0) { if(!adef->bootStopping) concatenateBSFiles(processes, bootstrapFileName); removeBSFiles(processes, bootstrapFileName); } MPI_Barrier(MPI_COMM_WORLD); #else bootstrapsPerformed = i; #endif adef->multipleRuns = bootstrapsPerformed; if(adef->bootStopping) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); if(bootStopIt) { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MRE_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } else { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MR_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } } }
void plausibilityChecker(tree *tr, analdef *adef) { FILE *treeFile, *rfFile; tree *smallTree = (tree *)rax_malloc(sizeof(tree)); char rfFileName[1024]; /* init hash table for big reference tree */ hashtable *h = initHashTable(tr->mxtips * 2 * 2); /* init the bit vectors we need for computing and storing bipartitions during the tree traversal */ unsigned int vLength, **bitVectors = initBitVector(tr, &vLength); int numberOfTreesAnalyzed = 0, branchCounter = 0, i; double avgRF = 0.0; /* set up an output file name */ strcpy(rfFileName, workdir); strcat(rfFileName, "RAxML_RF-Distances."); strcat(rfFileName, run_id); rfFile = myfopen(rfFileName, "wb"); assert(adef->mode == PLAUSIBILITY_CHECKER); /* open the big reference tree file and parse it */ treeFile = myfopen(tree_file, "r"); printBothOpen("Parsing reference tree %s\n", tree_file); treeReadLen(treeFile, tr, FALSE, TRUE, TRUE, adef, TRUE, FALSE); assert(tr->mxtips == tr->ntips); printBothOpen("The reference tree has %d tips\n", tr->ntips); fclose(treeFile); /* extract all induced bipartitions from the big tree and store them in the hastable */ bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, 0, BIPARTITIONS_RF, (branchInfo *)NULL, &branchCounter, 1, FALSE, FALSE); assert(branchCounter == tr->mxtips - 3); /* now see how many small trees we have */ treeFile = getNumberOfTrees(tr, bootStrapFile, adef); checkTreeNumber(tr->numberOfTrees, bootStrapFile); /* allocate a data structure for parsing the potentially mult-furcating tree */ allocateMultifurcations(tr, smallTree); /* loop over all small trees */ for(i = 0; i < tr->numberOfTrees; i++) { int numberOfSplits = readMultifurcatingTree(treeFile, smallTree, adef, TRUE); if(numberOfSplits > 0) { unsigned int entryCount = 0, k, j, *masked = (unsigned int *)rax_calloc(vLength, sizeof(unsigned int)), *smallTreeMask = (unsigned int *)rax_calloc(vLength, sizeof(unsigned int)); hashtable *rehash = initHashTable(tr->mxtips * 2 * 2); double rf, maxRF; int bCounter = 0, bips, firstTaxon, taxa = 0; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Small tree %d has %d tips and %d bipartitions\n", i, smallTree->ntips, numberOfSplits); /* compute the maximum RF distance for computing the relative RF distance later-on */ /* note that here we need to pay attention, since the RF distance is not normalized by 2 * (n-3) but we need to account for the fact that the multifurcating small tree will potentially contain less bipartitions. Hence the normalization factor is obtained as 2 * numberOfSplits, where numberOfSplits is the number of bipartitions in the small tree. */ maxRF = (double)(2 * numberOfSplits); /* now set up a bit mask where only the bits are set to one for those taxa that are actually present in the small tree we just read */ /* note that I had to apply some small changes to this function to make it work for multi-furcating trees ! */ setupMask(smallTreeMask, smallTree->start, smallTree->mxtips); setupMask(smallTreeMask, smallTree->start->back, smallTree->mxtips); /* now get the index of the first taxon of the small tree. we will use this to unambiguously store the bipartitions */ firstTaxon = smallTree->start->number; /* make sure that this bit vector is set up correctly, i.e., that it contains as many non-zero bits as there are taxa in this small tree */ for(j = 0; j < vLength; j++) taxa += BIT_COUNT(smallTreeMask[j]); assert(taxa == smallTree->ntips); /* now re-hash the big tree by applying the above bit mask */ /* loop over hash table */ for(k = 0, entryCount = 0; k < h->tableSize; k++) { if(h->table[k] != NULL) { entry *e = h->table[k]; /* we resolve collisions by chaining, hence the loop here */ do { unsigned int *bitVector = e->bitVector; hashNumberType position; int count = 0; /* double check that our tree mask contains the first taxon of the small tree */ assert(smallTreeMask[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH]); /* if the first taxon is set then we will re-hash the bit-wise complement of the bit vector. The count variable is used for a small optimization */ if(bitVector[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH]) { //hash complement for(j = 0; j < vLength; j++) { masked[j] = (~bitVector[j]) & smallTreeMask[j]; count += BIT_COUNT(masked[j]); } } else { //hash this vector for(j = 0; j < vLength; j++) { masked[j] = bitVector[j] & smallTreeMask[j]; count += BIT_COUNT(masked[j]); } } /* note that padding the last bits is not required because they are set to 0 automatically by smallTreeMask */ /* make sure that we will re-hash the canonic representation of the bipartition where the bit for firstTaxon is set to 0! */ assert(!(masked[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH])); /* only if the masked bipartition of the large tree is a non-trivial bipartition (two or more bits set to 1 will we re-hash it */ if(count > 1) { /* compute hash */ position = oat_hash((unsigned char *)masked, sizeof(unsigned int) * vLength); position = position % rehash->tableSize; /* re-hash to the new hash table that contains the bips of the large tree, pruned down to the taxa contained in the small tree */ insertHashPlausibility(masked, rehash, vLength, position); } entryCount++; e = e->next; } while(e != NULL); } } /* make sure that we tried to re-hash all bipartitions of the original tree */ assert(entryCount == (unsigned int)(tr->mxtips - 3)); /* now traverse the small tree and count how many bipartitions it shares with the corresponding induced tree from the large tree */ /* the following function also had to be modified to account for multi-furcating trees ! */ bips = bitVectorTraversePlausibility(bitVectors, smallTree->start->back, smallTree->mxtips, vLength, rehash, &bCounter, firstTaxon, smallTree, TRUE); /* compute the relative RF */ rf = (double)(2 * (numberOfSplits - bips)) / maxRF; assert(numberOfSplits >= bips); assert(rf <= 1.0); avgRF += rf; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Relative RF tree %d: %f\n\n", i, rf); fprintf(rfFile, "%d %f\n", i, rf); /* I also modified this assertion, we nee to make sure here that we checked all non-trivial splits/bipartitions in the multi-furcating tree whech can be less than n - 3 ! */ assert(bCounter == numberOfSplits); /* free masks and hast table for this iteration */ rax_free(smallTreeMask); rax_free(masked); freeHashTable(rehash); rax_free(rehash); numberOfTreesAnalyzed++; } } printBothOpen("Number of small trees skipped: %d\n\n", tr->numberOfTrees - numberOfTreesAnalyzed); printBothOpen("Average RF distance %f\n\n", avgRF / (double)numberOfTreesAnalyzed); printBothOpen("Total execution time: %f secs\n\n", gettime() - masterTime); printBothOpen("\nFile containing all %d pair-wise RF distances written to file %s\n\n", numberOfTreesAnalyzed, rfFileName); fclose(treeFile); fclose(rfFile); /* free the data structure used for parsing the potentially multi-furcating tree */ freeMultifurcations(smallTree); rax_free(smallTree); freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); }