static void state_free(state *s) { assert(s != NULL); rax_free(s->list); rax_free(s->curSubsRates); rax_free(s); }
void pllInitParsimonyStructures(pllInstance *tr, partitionList *pr, boolean perSiteScores) { int i, *informative = (int *)rax_malloc(sizeof(int) * (size_t)tr->originalCrunchedLength); for (i = 0; i < pr->numberOfPartitions; ++ i) rax_free (pr->partitionData[i]->parsVect); rax_free (tr->parsimonyScore); determineUninformativeSites(tr, pr, informative); compressDNA(tr, pr, informative, perSiteScores); for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++) { nodeptr p = tr->nodep[i]; p->xPars = 1; p->next->xPars = 0; p->next->next->xPars = 0; } tr->ti = (int*)rax_malloc(sizeof(int) * 4 * (size_t)tr->mxtips); rax_free(informative); }
/** @brief Deallocate the space associated with this structure * * @paral rl * This structure * * @todo * fill the description */ void freeTL(topolRELL_LIST *rl) { int i; for(i = 0; i < rl->max; i++) { rax_free(rl->t[i]->connect); rax_free(rl->t[i]); } rax_free(rl->t); }
void pllFreeParsimonyDataStructures(tree *tr) { size_t model; rax_free(tr->parsimonyScore); for(model = 0; model < (size_t) tr->NumberOfModels; model++) rax_free(tr->partitionData[model].parsVect); rax_free(tr->ti); }
void freeMultifurcations(tree *tr) { int i, tips = tr->mxtips, inter = tr->mxtips - 1; for (i = 1; i < tips + 3 * inter; i++) rax_free(tr->nodep[i]); rax_free(tr->nodep); }
/** @ingroup newickParseGroup @brief Deallocate newick parser stack structure Deallocates the newick parser stack structure that represents the parsed tree. It also frees all memory allocated by elements of the stack structure. @param tree The tree stack structure */ void pllNewickParseDestroy (pllNewickTree ** t) { pllNewickNodeInfo * item; while ((item = (pllNewickNodeInfo *)pllStackPop (&((*t)->tree)))) { rax_free (item->name); rax_free (item->branch); rax_free (item); } rax_free (*t); (*t) = NULL; }
/** @ingroup alignmentGroup @brief Parse a PHYLIP file Parses the PHYLIP file \a filename and returns a ::pllAlignmentData structure with the alignment. @param filename Name of file to be parsed @return Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL in case of failure. */ static pllAlignmentData * pllParsePHYLIP (const char * filename) { int i, input, sequenceCount, sequenceLength; char * rawdata; long filesize; pllAlignmentData * alignmentData; rawdata = pllReadFile (filename, &filesize); if (!rawdata) { errno = PLL_ERROR_FILE_OPEN; return (NULL); } init_lexan (rawdata, filesize); input = get_next_symbol(); /* parse the header to obtain the number of taxa and sequence length */ if (!read_phylip_header (&input, &sequenceCount, &sequenceLength)) { rax_free (rawdata); fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n"); errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX; return (NULL); } lex_table_amend_phylip(); /* allocate alignment structure */ alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength); if (! parse_phylip (alignmentData, input)) { errno = PLL_ERROR_PHYLIP_BODY_SYNTAX; pllAlignmentDataDestroy (alignmentData); lex_table_restore(); rax_free (rawdata); return (NULL); } lex_table_restore(); rax_free (rawdata); alignmentData->siteWeights = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int)); for (i = 0; i < alignmentData->sequenceLength; ++ i) alignmentData->siteWeights[i] = 1; return (alignmentData); }
boolean freeBestTree(bestlist *bt) { /* freeBestTree */ while (bt->ninit >= 0) freeTopol(bt->byScore[(bt->ninit)--]); /* VALGRIND */ rax_free(bt->byScore); rax_free(bt->byTopol); /* VALGRIND END */ freeTopol(bt->start); return PLL_TRUE; } /* freeBestTree */
/** @ingroup newickParseGroup @brief Parse a newick tree string Parse a newick string and create a stack structure which represents the tree in a preorder traversal form. Each element of the stack represents one node and consists of its name, branch length, number of children and depth. The stack structure is finally wrapped in a \a pllNewickTree structure which also contains the number of nodes and leaves. @param newick String containing the newick tree @return Returns a pointer to the created \a pllNewickTree structure in case of success, otherwise \b NULL */ pllNewickTree * pllNewickParseString (const char * newick) { int n, input, rc; pllNewickTree * t; int nodes, leaves; t = (pllNewickTree *) calloc (1, sizeof (pllNewickTree)); n = strlen (newick); init_lexan (newick, n); input = get_next_symbol(); rc = parse_newick (&(t->tree), &input); if (!rc) { /* TODO: properly clean t->tree */ rax_free (t); t = NULL; } else { assign_ranks (t->tree, &nodes, &leaves); t->nodes = nodes; t->tips = leaves; } return (t); }
void freeBitVectors(unsigned int **v, int n) { int i; for(i = 1; i < n; i++) rax_free(v[i]); }
int treeOptimizeThorough(tree *tr, int mintrav, int maxtrav) { int i; bestlist *bestT; nodeRectifier(tr); bestT = (bestlist *) rax_malloc(sizeof(bestlist)); bestT->ninit = 0; initBestTree(bestT, 1, tr->mxtips); if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3; tr->startLH = tr->endLH = tr->likelihood; for(i = 1; i <= tr->mxtips + tr->mxtips - 2; i++) { tr->bestOfNode = unlikely; if(rearrangeBIG(tr, tr->nodep[i], mintrav, maxtrav)) { if((tr->endLH > tr->startLH) && (tr->bestOfNode != unlikely)) { restoreTreeFast(tr); quickSmoothLocal(tr, 3); tr->startLH = tr->endLH = tr->likelihood; } else { if(tr->bestOfNode != unlikely) { resetBestTree(bestT); saveBestTree(bestT, tr); restoreTreeFast(tr); quickSmoothLocal(tr, 3); if(tr->likelihood < tr->startLH) { int res; res = recallBestTree(bestT, 1, tr); assert(res > 0); } else tr->startLH = tr->endLH = tr->likelihood; } } } } freeBestTree(bestT); rax_free(bestT); return 1; }
void freeHashTable(hashtable *h) { hashNumberType i, entryCount = 0; for(i = 0; i < h->tableSize; i++) { if(h->table[i] != NULL) { entry *e = h->table[i]; entry *previous; do { previous = e; e = e->next; if(previous->bitVector) rax_free(previous->bitVector); if(previous->treeVector) rax_free(previous->treeVector); if(previous->supportVector) rax_free(previous->supportVector); rax_free(previous); entryCount++; } while(e != NULL); } } assert(entryCount == h->entryCount); rax_free(h->table); }
static double evaluatePartialGTRCATSECONDARY(int i, double ki, int counter, traversalInfo *ti, double qz, int w, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, int branchReference, int mxtips) { double lz, term; double d[16]; double *x1, *x2; int scale = 0, k, l; double *lVector = (double *)rax_malloc(sizeof(double) * 16 * mxtips); traversalInfo *trav = &ti[0]; assert(isTip(trav->pNumber, mxtips)); x1 = &(tipVector[16 * yVector[trav->pNumber][i]]); for(k = 1; k < counter; k++) computeVectorGTRCATSECONDARY(lVector, &scale, ki, i, ti[k].qz[branchReference], ti[k].rz[branchReference], &ti[k], EIGN, EI, EV, tipVector, yVector, mxtips); x2 = &lVector[16 * (trav->qNumber - mxtips)]; assert(0 <= (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips); if(qz < zmin) lz = zmin; lz = log(qz); lz *= ki; d[0] = 1.0; for(l = 1; l < 16; l++) d[l] = EXP (EIGN[l-1] * lz); term = 0.0; for(l = 0; l < 16; l++) term += x1[l] * x2[l] * d[l]; term = LOG(FABS(term)) + (scale * LOG(minlikelihood)); term = term * w; rax_free(lVector); return term; }
static void nodeRectifierPars(tree *tr) { nodeptr *np = (nodeptr *)malloc(2 * tr->mxtips * sizeof(nodeptr)); int i; int count = 0; tr->start = tr->nodep[1]; tr->rooted = FALSE; /* TODO why is tr->rooted set to FALSE here ?*/ for(i = tr->mxtips + 1; i <= (tr->mxtips + tr->mxtips - 1); i++) np[i] = tr->nodep[i]; reorderNodes(tr, np, tr->start->back, &count); rax_free(np); }
static void rax_freeParams(int numberOfModels, pInfo *partBuffer) { int i; for(i = 0; i < numberOfModels; i++) { rax_free(partBuffer[i].EIGN); rax_free(partBuffer[i].EV); rax_free(partBuffer[i].EI); rax_free(partBuffer[i].substRates); rax_free(partBuffer[i].frequencies); rax_free(partBuffer[i].tipVector); } }
/** @ingroup alignmentGroup @brief Deallocates the memory associated with the alignment data structure Deallocates the memory associated with the alignment data structure \a alignmentData. @param alignmentData The alignment data structure */ void pllAlignmentDataDestroy (pllAlignmentData * alignmentData) { int i; for (i = 1; i <= alignmentData->sequenceCount; ++ i) { rax_free (alignmentData->sequenceLabels[i]); } rax_free (alignmentData->sequenceData[1]); rax_free (alignmentData->sequenceLabels); rax_free (alignmentData->sequenceData); rax_free (alignmentData->siteWeights); rax_free (alignmentData); }
static void calcDiagptable(const double z, const int states, const int numberOfCategories, const double *rptr, const double *EIGN, double *diagptable) { int i, l; double lz, *lza = (double *)malloc(sizeof(double) * states); /* transform the root branch length to the log and check if it is not too small */ if (z < zmin) lz = log(zmin); else lz = log(z); /* do some pre-computations to avoid redundant computations further below */ for(i = 0; i < states; i++) lza[i] = EIGN[i] * lz; /* loop over the number of per-site or discrete gamma rate categories */ for(i = 0; i < numberOfCategories; i++) { /* diagptable is a pre-allocated array of doubles that stores the P-Matrix the first entry is always 1.0 */ diagptable[i * states] = 1.0; /* compute the P matrix for all remaining states of the model */ for(l = 1; l < states; l++) diagptable[i * states + l] = EXP(rptr[i] * lza[l]); } rax_free(lza); }
/** @ingroup newickParseGroup @brief Parse a newick tree file Parse a newick file and create a stack structure which represents the tree in a preorder traversal form. Each element of the stack represents one node and consists of its name, branch length, number of children (rank) and depth. The stack structure is finally wrapped in a \a pllNewickTree structure which also contains the number of nodes and leaves. @param filename Filename containing the newick tree @return Returns a pointer to the created \a pllNewickTree structure in case of success, otherwise \b NULL */ pllNewickTree * pllNewickParseFile (const char * filename) { long n; char * rawdata; pllNewickTree * t; rawdata = pllReadFile (filename, &n); if (!rawdata) { fprintf (stderr, "Error while opening/reading file %s\n", filename); return (0); } //printf ("%s\n\n", rawdata); t = pllNewickParseString (rawdata); rax_free (rawdata); return (t); }
void allocateParsimonyDataStructures(tree *tr) { int i, *informative = (int *)malloc(sizeof(int) * (size_t)tr->originalCrunchedLength); determineUninformativeSites(tr, informative); compressDNA(tr, informative); for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++) { nodeptr p = tr->nodep[i]; p->xPars = 1; p->next->xPars = 0; p->next->next->xPars = 0; } tr->ti = (int*)malloc(sizeof(int) * 4 * (size_t)tr->mxtips); rax_free(informative); }
void cleanupHashTable(hashtable *h, int state) { hashNumberType k, entryCount = 0, removeCount = 0; assert(state == 1 || state == 0); for(k = 0, entryCount = 0; k < h->tableSize; k++) { if(h->table[k] != NULL) { entry *e = h->table[k]; entry *start = (entry*)NULL; entry *lastValid = (entry*)NULL; do { if(state == 0) { e->treeVector[0] = e->treeVector[0] & 2; assert(!(e->treeVector[0] & 1)); } else { e->treeVector[0] = e->treeVector[0] & 1; assert(!(e->treeVector[0] & 2)); } if(e->treeVector[0] != 0) { if(!start) start = e; lastValid = e; e = e->next; } else { entry *remove = e; e = e->next; removeCount++; if(lastValid) lastValid->next = remove->next; if(remove->bitVector) rax_free(remove->bitVector); if(remove->treeVector) rax_free(remove->treeVector); if(remove->supportVector) rax_free(remove->supportVector); rax_free(remove); } entryCount++; } while(e != NULL); if(!start) { assert(!lastValid); h->table[k] = NULL; } else { h->table[k] = start; } } } assert(entryCount == h->entryCount); h->entryCount -= removeCount; }
void evaluateGeneric (tree *tr, nodeptr p, boolean fullTraversal) { /* now this may be the entry point of the library to compute the log like at a branch defined by p and p->back == q */ volatile double result = 0.0; nodeptr q = p->back; int i, model; /* set the first entry of the traversal descriptor to contain the indices of nodes p and q */ tr->td[0].ti[0].pNumber = p->number; tr->td[0].ti[0].qNumber = q->number; /* copy the branch lengths of the tree into the first entry of the traversal descriptor. if -M is not used tr->numBranches must be 1 */ for(i = 0; i < tr->numBranches; i++) tr->td[0].ti[0].qz[i] = q->z[i]; /* now compute how many conditionals must be re-computed/re-oriented by newview to be able to calculate the likelihood at the root defined by p and q. */ /* one entry in the traversal descriptor is already used, hence set the tarversal length counter to 1 */ tr->td[0].count = 1; /* do we need to recompute any of the vectors at or below p ? */ if(fullTraversal) { assert(isTip(p->number, tr->mxtips)); computeTraversalInfo(q, &(tr->td[0].ti[0]), &(tr->td[0].count), tr->mxtips, tr->numBranches, FALSE); } else { if(!p->x) computeTraversalInfo(p, &(tr->td[0].ti[0]), &(tr->td[0].count), tr->mxtips, tr->numBranches, TRUE); /* recompute/reorient any descriptors at or below q ? computeTraversalInfo computes and stores the newview() to be executed for the traversal descriptor */ if(!q->x) computeTraversalInfo(q, &(tr->td[0].ti[0]), &(tr->td[0].count), tr->mxtips, tr->numBranches, TRUE); } /* now we copy this partition execute mask into the traversal descriptor which must come from the calling program, the logic of this should not form part of the library */ storeExecuteMaskInTraversalDescriptor(tr); /* also store in the traversal descriptor that something has changed i.e., in the parallel case that the traversal descriptor list of nodes needs to be broadcast once again */ tr->td[0].traversalHasChanged = TRUE; evaluateIterative(tr); if(0) { double *recv = (double *)malloc(sizeof(double) * tr->NumberOfModels); MPI_Allreduce(tr->perPartitionLH, recv, tr->NumberOfModels, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); for(model = 0; model < tr->NumberOfModels; model++) { /* TODO ??? */ /*tr->perPartitionLH[model] = recv[model]; */ result += recv[model]; } rax_free(recv); } else { double *recv = (double *)malloc(sizeof(double) * tr->NumberOfModels); MPI_Reduce(tr->perPartitionLH, recv, tr->NumberOfModels, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Bcast(recv, tr->NumberOfModels, MPI_DOUBLE, 0, MPI_COMM_WORLD); for(model = 0; model < tr->NumberOfModels; model++) { /* TODO ??? */ tr->perPartitionLH[model] = recv[model]; result += recv[model]; } rax_free(recv); } /* set the tree data structure likelihood value to the total likelihood */ tr->likelihood = result; /* MPI_Barrier(MPI_COMM_WORLD); printf("Process %d likelihood: %f\n", processID, tr->likelihood); MPI_Barrier(MPI_COMM_WORLD); */ /* do some bookkeeping to have traversalHasChanged in a consistent state */ tr->td[0].traversalHasChanged = FALSE; }
void handleExcludeFile(tree *tr, analdef *adef, rawdata *rdta) { FILE *f; char buf[256]; int ch, j, value, i, state = 0, numberOfModels = 0, l = -1, excludeRegion = 0, excludedColumns = 0, modelCounter = 1; int *excludeArray, *countArray, *modelList; int **partitions; printf("\n\n"); f = myfopen(excludeFileName, "rb"); while((ch = getc(f)) != EOF) { if(ch == '-') numberOfModels++; } excludeArray = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1)); countArray = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1)); modelList = (int *)rax_malloc((rdta->sites + 1)* sizeof(int)); partitions = (int **)rax_malloc(sizeof(int *) * numberOfModels); for(i = 0; i < numberOfModels; i++) partitions[i] = (int *)rax_malloc(sizeof(int) * 2); rewind(f); while((ch = getc(f)) != EOF) { switch(state) { case 0: /* get first number */ if(!whitechar(ch)) { if(!isNum(ch)) { printf("exclude file must have format: number-number [number-number]*\n"); exit(-1); } l = 0; buf[l++] = ch; state = 1; } break; case 1: /*get the number or detect - */ if(!isNum(ch) && ch != '-') { printf("exclude file must have format: number-number [number-number]*\n"); exit(-1); } if(isNum(ch)) { buf[l++] = ch; } else { buf[l++] = '\0'; value = atoi(buf); partitions[excludeRegion][0] = value; state = 2; } break; case 2: /*get second number */ if(!isNum(ch)) { printf("exclude file must have format: number-number [number-number]*\n"); exit(-1); } l = 0; buf[l++] = ch; state = 3; break; case 3: /* continue second number or find end */ if(!isNum(ch) && !whitechar(ch)) { printf("exclude file must have format: number-number [number-number]*\n"); exit(-1); } if(isNum(ch)) { buf[l++] = ch; } else { buf[l++] = '\0'; value = atoi(buf); partitions[excludeRegion][1] = value; excludeRegion++; state = 0; } break; default: assert(0); } } if(state == 3) { buf[l++] = '\0'; value = atoi(buf); partitions[excludeRegion][1] = value; excludeRegion++; } assert(excludeRegion == numberOfModels); for(i = 0; i <= rdta->sites; i++) { excludeArray[i] = -1; countArray[i] = 0; modelList[i] = -1; } for(i = 0; i < numberOfModels; i++) { int lower = partitions[i][0]; int upper = partitions[i][1]; if(lower > upper) { printf("Misspecified exclude region %d\n", i); printf("lower bound %d is greater than upper bound %d\n", lower, upper); exit(-1); } if(lower == 0) { printf("Misspecified exclude region %d\n", i); printf("lower bound must be greater than 0\n"); exit(-1); } if(upper > rdta->sites) { printf("Misspecified exclude region %d\n", i); printf("upper bound %d must be smaller than %d\n", upper, (rdta->sites + 1)); exit(-1); } for(j = lower; j <= upper; j++) { if(excludeArray[j] != -1) { printf("WARNING: Exclude regions %d and %d overlap at position %d (already excluded %d times)\n", excludeArray[j], i, j, countArray[j]); } excludeArray[j] = i; countArray[j] = countArray[j] + 1; } } for(i = 1; i <= rdta->sites; i++) { if(excludeArray[i] != -1) excludedColumns++; else { modelList[modelCounter] = tr->model[i]; modelCounter++; } } printf("You have excluded %d out of %d columns\n", excludedColumns, rdta->sites); if(excludedColumns == rdta->sites) { printf("Error: You have excluded all sites\n"); exit(-1); } if(adef->useSecondaryStructure && (excludedColumns > 0)) { char mfn[2048]; int countColumns; FILE *newFile; assert(adef->useMultipleModel); strcpy(mfn, secondaryStructureFileName); strcat(mfn, "."); strcat(mfn, excludeFileName); newFile = myfopen(mfn, "wb"); printBothOpen("\nA secondary structure file with analogous structure assignments for non-excluded columns is printed to file %s\n", mfn); for(i = 1, countColumns = 0; i <= rdta->sites; i++) { if(excludeArray[i] == -1) fprintf(newFile, "%c", tr->secondaryStructureInput[i - 1]); else countColumns++; } assert(countColumns == excludedColumns); fprintf(newFile,"\n"); fclose(newFile); } if(adef->useMultipleModel && (excludedColumns > 0)) { char mfn[2048]; FILE *newFile; strcpy(mfn, modelFileName); strcat(mfn, "."); strcat(mfn, excludeFileName); newFile = myfopen(mfn, "wb"); printf("\nA partition file with analogous model assignments for non-excluded columns is printed to file %s\n", mfn); for(i = 0; i < tr->NumberOfModels; i++) { boolean modelStillExists = FALSE; for(j = 1; (j <= rdta->sites) && (!modelStillExists); j++) { if(modelList[j] == i) modelStillExists = TRUE; } if(modelStillExists) { int k = 1; int lower, upper; int parts = 0; switch(tr->partitionData[i].dataType) { case AA_DATA: { char AAmodel[1024]; if(tr->partitionData[i].ascBias) { strcpy(AAmodel, "ASC_"); strcat(AAmodel, protModels[tr->partitionData[i].protModels]); } else strcpy(AAmodel, protModels[tr->partitionData[i].protModels]); if(tr->partitionData[i].usePredefinedProtFreqs == FALSE) strcat(AAmodel, "F"); if(tr->partitionData[i].optimizeBaseFrequencies) strcat(AAmodel, "X"); assert(!(tr->partitionData[i].optimizeBaseFrequencies && tr->partitionData[i].usePredefinedProtFreqs)); fprintf(newFile, "%s, ", AAmodel); } break; case DNA_DATA: if(tr->partitionData[i].optimizeBaseFrequencies) { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_DNAX, "); else fprintf(newFile, "DNAX, "); } else { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_DNA, "); else fprintf(newFile, "DNA, "); } break; case BINARY_DATA: if(tr->partitionData[i].optimizeBaseFrequencies) { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_BINX, "); else fprintf(newFile, "BINX, "); } else { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_BIN, "); else fprintf(newFile, "BIN, "); } break; case GENERIC_32: if(tr->partitionData[i].optimizeBaseFrequencies) { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_MULTIX, "); else fprintf(newFile, "MULTIX, "); } else { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_MULTI, "); else fprintf(newFile, "MULTI, "); } break; case GENERIC_64: if(tr->partitionData[i].optimizeBaseFrequencies) { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_CODONX, "); else fprintf(newFile, "CODONX, "); } else { if(tr->partitionData[i].ascBias) fprintf(newFile, "ASC_CODON, "); else fprintf(newFile, "CODON, "); } break; default: assert(0); } fprintf(newFile, "%s = ", tr->partitionData[i].partitionName); while(k <= rdta->sites) { if(modelList[k] == i) { lower = k; while((modelList[k + 1] == i) && (k <= rdta->sites)) k++; upper = k; if(lower == upper) { if(parts == 0) fprintf(newFile, "%d", lower); else fprintf(newFile, ",%d", lower); } else { if(parts == 0) fprintf(newFile, "%d-%d", lower, upper); else fprintf(newFile, ",%d-%d", lower, upper); } parts++; } k++; } fprintf(newFile, "\n"); } } fclose(newFile); } { FILE *newFile; char mfn[2048]; strcpy(mfn, seq_file); strcat(mfn, "."); strcat(mfn, excludeFileName); newFile = myfopen(mfn, "wb"); printf("\nAn alignment file with excluded columns is printed to file %s\n\n\n", mfn); fprintf(newFile, "%d %d\n", tr->mxtips, rdta->sites - excludedColumns); for(i = 1; i <= tr->mxtips; i++) { unsigned char *tipI = &(rdta->y[i][1]); fprintf(newFile, "%s ", tr->nameList[i]); for(j = 0; j < rdta->sites; j++) { if(excludeArray[j + 1] == -1) fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j])); } fprintf(newFile, "\n"); } fclose(newFile); } fclose(f); for(i = 0; i < numberOfModels; i++) rax_free(partitions[i]); rax_free(partitions); rax_free(excludeArray); rax_free(countArray); rax_free(modelList); }
static double evaluatePartialCAT_FLEX(int i, double ki, int counter, traversalInfo *ti, double qz, int w, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, int branchReference, int mxtips, const int states) { int scale = 0, k; double *lVector = (double *)malloc_aligned(sizeof(double) * states * mxtips), *d = (double *)malloc_aligned(sizeof(double) * states), lz, term, *x1, *x2; traversalInfo *trav = &ti[0]; assert(isTip(trav->pNumber, mxtips)); x1 = &(tipVector[states * yVector[trav->pNumber][i]]); for(k = 1; k < counter; k++) { double qz = ti[k].qz[branchReference], rz = ti[k].rz[branchReference]; qz = (qz > zmin) ? log(qz) : log(zmin); rz = (rz > zmin) ? log(rz) : log(zmin); computeVectorCAT_FLEX(lVector, &scale, ki, i, qz, rz, &ti[k], EIGN, EI, EV, tipVector, yVector, mxtips, states); } x2 = &lVector[states * (trav->qNumber - mxtips)]; assert(0 <= (trav->qNumber - mxtips) && (trav->qNumber - mxtips) < mxtips); if(qz < zmin) lz = zmin; lz = log(qz); lz *= ki; d[0] = 1.0; for(k = 1; k < states; k++) d[k] = EXP (EIGN[k] * lz); term = 0.0; for(k = 0; k < states; k++) term += x1[k] * x2[k] * d[k]; term = LOG(FABS(term)) + (scale * LOG(minlikelihood)); term = term * w; rax_free(lVector); rax_free(d); return term; }
void parseSecondaryStructure(tree *tr, analdef *adef, int sites) { if(adef->useSecondaryStructure) { FILE *f = myfopen(secondaryStructureFileName, "rb"); int i, k, countCharacters = 0, ch, *characters, **brackets, opening, closing, depth, numberOfSymbols, numSecondaryColumns; unsigned char bracketTypes[4][2] = {{'(', ')'}, {'<', '>'}, {'[', ']'}, {'{', '}'}}; numberOfSymbols = 4; tr->secondaryStructureInput = (char*)rax_malloc(sizeof(char) * sites); while((ch = fgetc(f)) != EOF) { if(ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '{' || ch == '}' || ch == '.') countCharacters++; else { if(!whitechar(ch)) { printf("Secondary Structure file %s contains character %c at position %d\n", secondaryStructureFileName, ch, countCharacters + 1); printf("Allowed Characters are \"( ) < > [ ] { } \" and \".\" \n"); errorExit(-1); } } } if(countCharacters != sites) { printf("Error: Alignment length is: %d, secondary structure file has length %d\n", sites, countCharacters); errorExit(-1); } characters = (int*)rax_malloc(sizeof(int) * countCharacters); brackets = (int **)rax_malloc(sizeof(int*) * numberOfSymbols); for(k = 0; k < numberOfSymbols; k++) brackets[k] = (int*)rax_calloc(countCharacters, sizeof(int)); rewind(f); countCharacters = 0; while((ch = fgetc(f)) != EOF) { if(!whitechar(ch)) { tr->secondaryStructureInput[countCharacters] = ch; characters[countCharacters++] = ch; } } assert(countCharacters == sites); for(k = 0; k < numberOfSymbols; k++) { for(i = 0, opening = 0, closing = 0, depth = 0; i < countCharacters; i++) { if((characters[i] == bracketTypes[k][0] || characters[i] == bracketTypes[k][1]) && (tr->extendedDataVector[i+1] == AA_DATA || tr->extendedDataVector[i+1] == BINARY_DATA || tr->extendedDataVector[i+1] == GENERIC_32 || tr->extendedDataVector[i+1] == GENERIC_64)) { printf("Secondary Structure only for DNA character positions \n"); printf("I am at position %d of the secondary structure file and this is not part of a DNA partition\n", i+1); errorExit(-1); } if(characters[i] == bracketTypes[k][0]) { depth++; /*printf("%d %d\n", depth, i);*/ brackets[k][i] = depth; opening++; } if(characters[i] == bracketTypes[k][1]) { brackets[k][i] = depth; /*printf("%d %d\n", depth, i); */ depth--; closing++; } if(closing > opening) { printf("at position %d there is a closing bracket too much\n", i+1); errorExit(-1); } } if(depth != 0) { printf("Problem: Depth: %d\n", depth); printf("Your secondary structure file may be missing a closing or opening paraenthesis!\n"); } assert(depth == 0); if(countCharacters != sites) { printf("Problem: sec chars: %d sites: %d\n",countCharacters, sites); printf("The number of sites in the alignment does not match the length of the secondary structure file\n"); } assert(countCharacters == sites); if(closing != opening) { printf("Number of opening brackets %d should be equal to number of closing brackets %d\n", opening, closing); errorExit(-1); } } for(i = 0, numSecondaryColumns = 0; i < countCharacters; i++) { int checkSum = 0; for(k = 0; k < numberOfSymbols; k++) { if(brackets[k][i] > 0) { checkSum++; switch(tr->secondaryStructureModel) { case SEC_16: case SEC_16_A: case SEC_16_B: case SEC_16_C: case SEC_16_D: case SEC_16_E: case SEC_16_F: case SEC_16_I: case SEC_16_J: case SEC_16_K: tr->extendedDataVector[i+1] = SECONDARY_DATA; break; case SEC_6_A: case SEC_6_B: case SEC_6_C: case SEC_6_D: case SEC_6_E: tr->extendedDataVector[i+1] = SECONDARY_DATA_6; break; case SEC_7_A: case SEC_7_B: case SEC_7_C: case SEC_7_D: case SEC_7_E: case SEC_7_F: tr->extendedDataVector[i+1] = SECONDARY_DATA_7; break; default: assert(0); } numSecondaryColumns++; } } assert(checkSum <= 1); } assert(numSecondaryColumns % 2 == 0); /*printf("Number of secondary columns: %d merged columns: %d\n", numSecondaryColumns, numSecondaryColumns / 2);*/ tr->numberOfSecondaryColumns = numSecondaryColumns; if(numSecondaryColumns > 0) { int model = tr->NumberOfModels; int countPairs; pInfo *partBuffer = (pInfo*)rax_malloc(sizeof(pInfo) * tr->NumberOfModels); for(i = 1; i <= sites; i++) { for(k = 0; k < numberOfSymbols; k++) { if(brackets[k][i-1] > 0) tr->model[i] = model; } } /* now make a copy of partition data */ for(i = 0; i < tr->NumberOfModels; i++) { partBuffer[i].partitionName = (char*)rax_malloc((strlen(tr->extendedPartitionData[i].partitionName) + 1) * sizeof(char)); strcpy(partBuffer[i].partitionName, tr->extendedPartitionData[i].partitionName); strcpy(partBuffer[i].proteinSubstitutionFileName, tr->extendedPartitionData[i].proteinSubstitutionFileName); strcpy(partBuffer[i].ascFileName, tr->extendedPartitionData[i].ascFileName); partBuffer[i].dataType = tr->extendedPartitionData[i].dataType; partBuffer[i].protModels = tr->extendedPartitionData[i].protModels; partBuffer[i].usePredefinedProtFreqs = tr->extendedPartitionData[i].usePredefinedProtFreqs; partBuffer[i].optimizeBaseFrequencies = tr->extendedPartitionData[i].optimizeBaseFrequencies; } for(i = 0; i < tr->NumberOfModels; i++) rax_free(tr->extendedPartitionData[i].partitionName); rax_free(tr->extendedPartitionData); tr->extendedPartitionData = (pInfo*)rax_malloc(sizeof(pInfo) * (tr->NumberOfModels + 1)); for(i = 0; i < tr->NumberOfModels; i++) { tr->extendedPartitionData[i].partitionName = (char*)rax_malloc((strlen(partBuffer[i].partitionName) + 1) * sizeof(char)); strcpy(tr->extendedPartitionData[i].partitionName, partBuffer[i].partitionName); strcpy(tr->extendedPartitionData[i].proteinSubstitutionFileName, partBuffer[i].proteinSubstitutionFileName); strcpy(tr->extendedPartitionData[i].ascFileName, partBuffer[i].ascFileName); tr->extendedPartitionData[i].dataType = partBuffer[i].dataType; tr->extendedPartitionData[i].protModels= partBuffer[i].protModels; tr->extendedPartitionData[i].usePredefinedProtFreqs= partBuffer[i].usePredefinedProtFreqs; tr->extendedPartitionData[i].optimizeBaseFrequencies = partBuffer[i].optimizeBaseFrequencies; rax_free(partBuffer[i].partitionName); } rax_free(partBuffer); tr->extendedPartitionData[i].partitionName = (char*)rax_malloc(64 * sizeof(char)); switch(tr->secondaryStructureModel) { case SEC_16: case SEC_16_A: case SEC_16_B: case SEC_16_C: case SEC_16_D: case SEC_16_E: case SEC_16_F: case SEC_16_I: case SEC_16_J: case SEC_16_K: strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 16 STATE MODEL"); tr->extendedPartitionData[i].dataType = SECONDARY_DATA; break; case SEC_6_A: case SEC_6_B: case SEC_6_C: case SEC_6_D: case SEC_6_E: strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 6 STATE MODEL"); tr->extendedPartitionData[i].dataType = SECONDARY_DATA_6; break; case SEC_7_A: case SEC_7_B: case SEC_7_C: case SEC_7_D: case SEC_7_E: case SEC_7_F: strcpy(tr->extendedPartitionData[i].partitionName, "SECONDARY STRUCTURE 7 STATE MODEL"); tr->extendedPartitionData[i].dataType = SECONDARY_DATA_7; break; default: assert(0); } tr->extendedPartitionData[i].protModels= -1; tr->extendedPartitionData[i].usePredefinedProtFreqs = FALSE; tr->NumberOfModels++; if(adef->perGeneBranchLengths) { if(tr->NumberOfModels > NUM_BRANCHES) { printf("You are trying to use %d partitioned models for an individual per-gene branch length estimate.\n", tr->NumberOfModels); printf("Currently only %d are allowed to improve efficiency.\n", NUM_BRANCHES); printf("Note that the number of partitions has automatically been incremented by one to accommodate secondary structure models\n"); printf("\n"); printf("In order to change this please replace the line \"#define NUM_BRANCHES %d\" in file \"axml.h\" \n", NUM_BRANCHES); printf("by \"#define NUM_BRANCHES %d\" and then re-compile RAxML.\n", tr->NumberOfModels); exit(-1); } else { tr->multiBranch = 1; tr->numBranches = tr->NumberOfModels; } } assert(countCharacters == sites); tr->secondaryStructurePairs = (int*)rax_malloc(sizeof(int) * countCharacters); for(i = 0; i < countCharacters; i++) tr->secondaryStructurePairs[i] = -1; /* for(i = 0; i < countCharacters; i++) printf("%d", brackets[i]); printf("\n"); */ countPairs = 0; for(k = 0; k < numberOfSymbols; k++) { i = 0; while(i < countCharacters) { int j = i, bracket = -1, openBracket, closeBracket; while(j < countCharacters && ((bracket = brackets[k][j]) == 0)) { i++; j++; } assert(bracket >= 0); if(j == countCharacters) { assert(bracket == 0); break; } openBracket = j; j++; while(bracket != brackets[k][j] && j < countCharacters) j++; assert(j < countCharacters); closeBracket = j; assert(closeBracket < countCharacters && openBracket < countCharacters); assert(brackets[k][closeBracket] > 0 && brackets[k][openBracket] > 0); /*printf("%d %d %d\n", openBracket, closeBracket, bracket);*/ brackets[k][closeBracket] = 0; brackets[k][openBracket] = 0; countPairs++; tr->secondaryStructurePairs[closeBracket] = openBracket; tr->secondaryStructurePairs[openBracket] = closeBracket; } assert(i == countCharacters); } assert(countPairs == numSecondaryColumns / 2); /*for(i = 0; i < countCharacters; i++) printf("%d ", tr->secondaryStructurePairs[i]); printf("\n");*/ adef->useMultipleModel = TRUE; } for(k = 0; k < numberOfSymbols; k++) rax_free(brackets[k]); rax_free(brackets); rax_free(characters); fclose(f); } }
void parsePartitions(analdef *adef, rawdata *rdta, tree *tr) { FILE *f; int numberOfModels = 0; int nbytes = 0; char *ch; char *cc = (char *)NULL; char **p_names; int n, i, l; int lower, upper, modulo; char buf[256]; int **partitions; int pairsCount; int as, j; int k; f = myfopen(modelFileName, "rb"); while(myGetline(&cc, &nbytes, f) > -1) { if(!lineContainsOnlyWhiteChars(cc)) { numberOfModels++; } if(cc) rax_free(cc); cc = (char *)NULL; } rewind(f); p_names = (char **)rax_malloc(sizeof(char *) * numberOfModels); partitions = (int **)rax_malloc(sizeof(int *) * numberOfModels); tr->initialPartitionData = (pInfo*)rax_malloc(sizeof(pInfo) * numberOfModels); for(i = 0; i < numberOfModels; i++) { tr->initialPartitionData[i].protModels = adef->proteinMatrix; tr->initialPartitionData[i].usePredefinedProtFreqs = adef->protEmpiricalFreqs; tr->initialPartitionData[i].optimizeBaseFrequencies = FALSE; tr->initialPartitionData[i].dataType = -1; } for(i = 0; i < numberOfModels; i++) partitions[i] = (int *)NULL; i = 0; while(myGetline(&cc, &nbytes, f) > -1) { if(!lineContainsOnlyWhiteChars(cc)) { n = strlen(cc); p_names[i] = (char *)rax_malloc(sizeof(char) * (n + 1)); strcpy(&(p_names[i][0]), cc); i++; } if(cc) rax_free(cc); cc = (char *)NULL; } for(i = 0; i < numberOfModels; i++) { ch = p_names[i]; pairsCount = 0; skipWhites(&ch); if(*ch == '=') { printf("Identifier missing prior to '=' in %s\n", p_names[i]); exit(-1); } analyzeIdentifier(&ch, i, tr); ch++; numberPairs: pairsCount++; partitions[i] = (int *)rax_realloc((void *)partitions[i], (1 + 3 * pairsCount) * sizeof(int), FALSE); partitions[i][0] = pairsCount; partitions[i][3 + 3 * (pairsCount - 1)] = -1; skipWhites(&ch); if(!isNum(*ch)) { printf("%c Number expected in %s\n", *ch, p_names[i]); exit(-1); } l = 0; while(isNum(*ch)) { /*printf("%c", *ch);*/ buf[l] = *ch; ch++; l++; } buf[l] = '\0'; lower = atoi(buf); partitions[i][1 + 3 * (pairsCount - 1)] = lower; skipWhites(&ch); /* NEW */ if((*ch != '-') && (*ch != ',')) { if(*ch == '\0' || *ch == '\n' || *ch == '\r') { upper = lower; goto SINGLE_NUMBER; } else { printf("'-' or ',' expected in %s\n", p_names[i]); exit(-1); } } if(*ch == ',') { upper = lower; goto SINGLE_NUMBER; } /* END NEW */ ch++; skipWhites(&ch); if(!isNum(*ch)) { printf("%c Number expected in %s\n", *ch, p_names[i]); exit(-1); } l = 0; while(isNum(*ch)) { buf[l] = *ch; ch++; l++; } buf[l] = '\0'; upper = atoi(buf); SINGLE_NUMBER: partitions[i][2 + 3 * (pairsCount - 1)] = upper; if(upper < lower) { printf("Upper bound %d smaller than lower bound %d for this partition: %s\n", upper, lower, p_names[i]); exit(-1); } skipWhites(&ch); if(*ch == '\0' || *ch == '\n' || *ch == '\r') /* PC-LINEBREAK*/ { goto parsed; } if(*ch == ',') { ch++; goto numberPairs; } if(*ch == '\\') { ch++; skipWhites(&ch); if(!isNum(*ch)) { printf("%c Number expected in %s\n", *ch, p_names[i]); exit(-1); } if(adef->compressPatterns == FALSE) { printf("\nError: You are not allowed to use interleaved partitions, that is, assign non-contiguous sites\n"); printf("to the same partition model, when pattern compression is disabled via the -H flag,\n"); printf("or when pattern compression is disabled implicitely by some other option that requires it!\n\n"); exit(-1); } l = 0; while(isNum(*ch)) { buf[l] = *ch; ch++; l++; } buf[l] = '\0'; modulo = atoi(buf); partitions[i][3 + 3 * (pairsCount - 1)] = modulo; skipWhites(&ch); if(*ch == '\0' || *ch == '\n' || *ch == '\r') { goto parsed; } if(*ch == ',') { ch++; goto numberPairs; } } if(*ch == '/') { printf("\nRAxML detected the character \"/\" in your partition file.\n"); printf("Did you mean to write something similar to this: \"DNA, p1=1-100\\3\" ?\n"); printf("It's actually a backslash, not a slash, the program will exit now with an error!\n\n"); } else { printf("\nRAxML detected the character \"%c\" in your partition file,\n", *ch); printf("while it does not belong there!\n"); printf("\nAre you sure that your partition file complies with the RAxML partition file format?\n"); printf("\nActually reading the manual, does indeed do help a lot\n\n"); printf("The program will exit now with an error!\n\n"); } printf("The problematic line in your partition file is this one here:\n\n"); printf("%s\n\n", p_names[i]); assert(0); parsed: ; } fclose(f); /*********************************************************************************************************************/ for(i = 0; i <= rdta->sites; i++) tr->model[i] = -1; for(i = 0; i < numberOfModels; i++) { as = partitions[i][0]; for(j = 0; j < as; j++) { lower = partitions[i][1 + j * 3]; upper = partitions[i][2 + j * 3]; modulo = partitions[i][3 + j * 3]; if(modulo == -1) { for(k = lower; k <= upper; k++) setModel(i, k, tr->model); } else { for(k = lower; k <= upper; k += modulo) { if(k <= rdta->sites) setModel(i, k, tr->model); } } } } for(i = 1; i < rdta->sites + 1; i++) { if(tr->model[i] == -1) { printf("ERROR: Alignment Position %d has not been assigned any model\n", i); exit(-1); } } for(i = 0; i < numberOfModels; i++) { rax_free(partitions[i]); rax_free(p_names[i]); } rax_free(partitions); rax_free(p_names); tr->NumberOfModels = numberOfModels; if(adef->perGeneBranchLengths) { if(tr->NumberOfModels > NUM_BRANCHES) { printf("You are trying to use %d partitioned models for an individual per-gene branch length estimate.\n", tr->NumberOfModels); printf("Currently only %d are allowed to improve efficiency.\n", NUM_BRANCHES); printf("\n"); printf("In order to change this please replace the line \"#define NUM_BRANCHES %d\" in file \"axml.h\" \n", NUM_BRANCHES); printf("by \"#define NUM_BRANCHES %d\" and then re-compile RAxML.\n", tr->NumberOfModels); exit(-1); } else { tr->multiBranch = 1; tr->numBranches = tr->NumberOfModels; } } }
void doAllInOne(tree *tr, analdef *adef) { int i, n, bestIndex, bootstrapsPerformed; #ifdef _WAYNE_MPI int bootStopTests = 1, j, bootStrapsPerProcess = 0; #endif double loopTime; int *originalRateCategories; int *originalInvariant; #ifdef _WAYNE_MPI int slowSearches, fastEvery; #else int slowSearches, fastEvery = 5; #endif int treeVectorLength = -1; topolRELL_LIST *rl; double bestLH, mlTime, overallTime; long radiusSeed = adef->rapidBoot; FILE *f; char bestTreeFileName[1024]; hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int**)NULL; boolean bootStopIt = FALSE; double pearsonAverage = 0.0; pInfo *catParams = allocParams(tr); pInfo *gammaParams = allocParams(tr); unsigned int vLength; n = adef->multipleRuns; #ifdef _WAYNE_MPI if(n % processes != 0) n = processes * ((n / processes) + 1); #endif if(adef->bootStopping) { h = initHashTable(tr->mxtips * 100); treeVectorLength = adef->multipleRuns; bitVectors = initBitVector(tr, &vLength); } rl = (topolRELL_LIST *)rax_malloc(sizeof(topolRELL_LIST)); initTL(rl, tr, n); originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); initModel(tr, tr->rdta, tr->cdta, adef); if(adef->grouping) printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the constraint tree specified in %s\n", tree_file); if(adef->constraint) printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the bifurcating backbone constraint tree specified in %s\n", tree_file); #ifdef _WAYNE_MPI long parsimonySeed0 = adef->parsimonySeed; long replicateSeed0 = adef->rapidBoot; n = n / processes; #endif for(i = 0; i < n && !bootStopIt; i++) { #ifdef _WAYNE_MPI j = i + n * processID; tr->treeID = j; #else tr->treeID = i; #endif tr->checkPointCounter = 0; loopTime = gettime(); #ifdef _WAYNE_MPI if(i == 0) { if(parsimonySeed0 != 0) adef->parsimonySeed = parsimonySeed0 + 10000 * processID; adef->rapidBoot = replicateSeed0 + 10000 * processID; radiusSeed = adef->rapidBoot; } #endif if(i % 10 == 0) { if(i > 0) reductionCleanup(tr, originalRateCategories, originalInvariant); if(adef->grouping || adef->constraint) { FILE *f = myfopen(tree_file, "rb"); assert(adef->restart); if (! treeReadLenMULT(f, tr, adef)) exit(-1); fclose(f); } else makeParsimonyTree(tr, adef); tr->likelihood = unlikely; if(i == 0) { double t; onlyInitrav(tr, tr->start); treeEvaluate(tr, 1); t = gettime(); modOpt(tr, adef, FALSE, 5.0); #ifdef _WAYNE_MPI printBothOpen("\nTime for BS model parameter optimization on Process %d: %f seconds\n", processID, gettime() - t); #else printBothOpen("\nTime for BS model parameter optimization %f\n", gettime() - t); #endif memcpy(originalRateCategories, tr->cdta->rateCategory, sizeof(int) * tr->cdta->endsite); memcpy(originalInvariant, tr->invariant, sizeof(int) * tr->cdta->endsite); if(adef->bootstrapBranchLengths) { if(tr->rateHetModel == CAT) { copyParams(tr->NumberOfModels, catParams, tr->partitionData, tr); assert(tr->cdta->endsite == tr->originalCrunchedLength); catToGamma(tr, adef); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); copyParams(tr->NumberOfModels, gammaParams, tr->partitionData, tr); gammaToCat(tr); copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr); } else { assert(tr->cdta->endsite == tr->originalCrunchedLength); } } } } computeNextReplicate(tr, &adef->rapidBoot, originalRateCategories, originalInvariant, TRUE, TRUE); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); computeBOOTRAPID(tr, adef, &radiusSeed); #ifdef _WAYNE_MPI saveTL(rl, tr, j); #else saveTL(rl, tr, i); #endif if(adef->bootstrapBranchLengths) { double lh = tr->likelihood; if(tr->rateHetModel == CAT) { copyParams(tr->NumberOfModels, tr->partitionData, gammaParams, tr); catToGamma(tr, adef); resetBranches(tr); onlyInitrav(tr, tr->start); treeEvaluate(tr, 2.0); gammaToCat(tr); copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr); tr->likelihood = lh; } else { treeEvaluate(tr, 2.0); tr->likelihood = lh; } } printBootstrapResult(tr, adef, TRUE); loopTime = gettime() - loopTime; writeInfoFile(adef, tr, loopTime); if(adef->bootStopping) #ifdef _WAYNE_MPI { int nn = (i + 1) * processes; if((nn > START_BSTOP_TEST) && (i * processes < FC_SPACING * bootStopTests) && ((i + 1) * processes >= FC_SPACING * bootStopTests) ) { MPI_Barrier(MPI_COMM_WORLD); concatenateBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); bootStopIt = computeBootStopMPI(tr, bootstrapFileName, adef, &pearsonAverage); bootStopTests++; } } #else bootStopIt = bootStop(tr, h, i, &pearsonAverage, bitVectors, treeVectorLength, vLength, adef); #endif } #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); bootstrapsPerformed = i * processes; bootStrapsPerProcess = i; concatenateBSFiles(processes, bootstrapFileName); removeBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); #else bootstrapsPerformed = i; #endif rax_freeParams(tr->NumberOfModels, catParams); rax_free(catParams); rax_freeParams(tr->NumberOfModels, gammaParams); rax_free(gammaParams); if(adef->bootStopping) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); } { double t; printBothOpenMPI("\n\n"); if(adef->bootStopping) { if(bootStopIt) { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } else { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } } t = gettime() - masterTime; printBothOpenMPI("Overall Time for %d Rapid Bootstraps %f seconds\n", bootstrapsPerformed, t); printBothOpenMPI("Average Time per Rapid Bootstrap %f seconds\n", (double)(t/((double)bootstrapsPerformed))); if(!adef->allInOne) { printBothOpenMPI("All %d bootstrapped trees written to: %s\n", bootstrapsPerformed, bootstrapFileName); #ifdef _WAYNE_MPI MPI_Finalize(); #endif exit(0); } } /* ML-search */ mlTime = gettime(); double t = mlTime; printBothOpenMPI("\nStarting ML Search ...\n\n"); /***CLEAN UP reduction stuff */ reductionCleanup(tr, originalRateCategories, originalInvariant); /****/ #ifdef _WAYNE_MPI restoreTL(rl, tr, n * processID); #else restoreTL(rl, tr, 0); #endif resetBranches(tr); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); #ifdef _WAYNE_MPI if(bootstrapsPerformed <= 100) fastEvery = 5; else fastEvery = bootstrapsPerformed / 20; for(i = 0; i < bootstrapsPerformed; i++) rl->t[i]->likelihood = unlikely; for(i = 0; i < bootStrapsPerProcess; i++) { j = i + n * processID; if(i % fastEvery == 0) { restoreTL(rl, tr, j); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); optimizeRAPID(tr, adef); saveTL(rl, tr, j); } } #else for(i = 0; i < bootstrapsPerformed; i++) { rl->t[i]->likelihood = unlikely; if(i % fastEvery == 0) { restoreTL(rl, tr, i); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); optimizeRAPID(tr, adef); saveTL(rl, tr, i); } } #endif printBothOpenMPI("Fast ML optimization finished\n\n"); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Fast ML search on Process %d: Time %f seconds\n\n", processID, t); j = n * processID; qsort(&(rl->t[j]), n, sizeof(topolRELL*), compareTopolRell); restoreTL(rl, tr, j); #else printBothOpen("Fast ML search Time: %f seconds\n\n", t); qsort(&(rl->t[0]), bootstrapsPerformed, sizeof(topolRELL*), compareTopolRell); restoreTL(rl, tr, 0); #endif t = gettime(); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); slowSearches = bootstrapsPerformed / 5; if(bootstrapsPerformed % 5 != 0) slowSearches++; slowSearches = MIN(slowSearches, 10); #ifdef _WAYNE_MPI if(processes > 1) { if(slowSearches % processes == 0) slowSearches = slowSearches / processes; else slowSearches = (slowSearches / processes) + 1; } for(i = 0; i < slowSearches; i++) { j = i + n * processID; restoreTL(rl, tr, j); rl->t[j]->likelihood = unlikely; evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1.0); thoroughOptimization(tr, adef, rl, j); } #else for(i = 0; i < slowSearches; i++) { restoreTL(rl, tr, i); rl->t[i]->likelihood = unlikely; evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1.0); thoroughOptimization(tr, adef, rl, i); } #endif /*************************************************************************************************************/ if(tr->rateHetModel == CAT) { catToGamma(tr, adef); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); } bestIndex = -1; bestLH = unlikely; #ifdef _WAYNE_MPI for(i = 0; i < slowSearches; i++) { j = i + n * processID; restoreTL(rl, tr, j); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); printBothOpen("Slow ML Search %d Likelihood: %f\n", j, tr->likelihood); if(tr->likelihood > bestLH) { bestLH = tr->likelihood; bestIndex = j; } } /*printf("processID = %d, bestIndex = %d; bestLH = %f\n", processID, bestIndex, bestLH);*/ #else for(i = 0; i < slowSearches; i++) { restoreTL(rl, tr, i); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); printBothOpen("Slow ML Search %d Likelihood: %f\n", i, tr->likelihood); if(tr->likelihood > bestLH) { bestLH = tr->likelihood; bestIndex = i; } } #endif printBothOpenMPI("Slow ML optimization finished\n\n"); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Slow ML search on Process %d: Time %f seconds\n", processID, t); #else printBothOpen("Slow ML search Time: %f seconds\n", t); #endif t = gettime(); restoreTL(rl, tr, bestIndex); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); Thorough = 1; tr->doCutoff = FALSE; treeOptimizeThorough(tr, 1, 10); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Thorough ML search on Process %d: Time %f seconds\n", processID, t); #else printBothOpen("Thorough ML search Time: %f seconds\n", t); #endif #ifdef _WAYNE_MPI bestLH = tr->likelihood; printf("\nprocessID = %d, bestLH = %f\n", processID, bestLH); if(processes > 1) { double *buffer; int bestProcess; buffer = (double *)rax_malloc(sizeof(double) * processes); for(i = 0; i < processes; i++) buffer[i] = unlikely; buffer[processID] = bestLH; for(i = 0; i < processes; i++) MPI_Bcast(&buffer[i], 1, MPI_DOUBLE, i, MPI_COMM_WORLD); bestLH = buffer[0]; bestProcess = 0; for(i = 1; i < processes; i++) if(buffer[i] > bestLH) { bestLH = buffer[i]; bestProcess = i; } rax_free(buffer); if(processID != bestProcess) { MPI_Finalize(); exit(0); } } #endif printBothOpen("\nFinal ML Optimization Likelihood: %f\n", tr->likelihood); printBothOpen("\nModel Information:\n\n"); printModelParams(tr, adef); strcpy(bestTreeFileName, workdir); strcat(bestTreeFileName, "RAxML_bestTree."); strcat(bestTreeFileName, run_id); Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); f = myfopen(bestTreeFileName, "wb"); fprintf(f, "%s", tr->tree_string); fclose(f); if(adef->perGeneBranchLengths) printTreePerGene(tr, adef, bestTreeFileName, "w"); overallTime = gettime() - masterTime; mlTime = gettime() - mlTime; printBothOpen("\nML search took %f secs or %f hours\n", mlTime, mlTime / 3600.0); printBothOpen("\nCombined Bootstrap and ML search took %f secs or %f hours\n", overallTime, overallTime / 3600.0); printBothOpen("\nDrawing Bootstrap Support Values on best-scoring ML tree ...\n\n"); freeTL(rl); rax_free(rl); calcBipartitions(tr, adef, bestTreeFileName, bootstrapFileName); overallTime = gettime() - masterTime; printBothOpen("Program execution info written to %s\n", infoFileName); printBothOpen("All %d bootstrapped trees written to: %s\n\n", bootstrapsPerformed, bootstrapFileName); printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName); if(adef->perGeneBranchLengths && tr->NumberOfModels > 1) printBothOpen("Per-Partition branch lengths of best-scoring ML tree written to %s.PARTITION.0 to %s.PARTITION.%d\n\n", bestTreeFileName, bestTreeFileName, tr->NumberOfModels - 1); printBothOpen("Best-scoring ML tree with support values written to: %s\n\n", bipartitionsFileName); printBothOpen("Best-scoring ML tree with support values as branch labels written to: %s\n\n", bipartitionsFileNameBranchLabels); printBothOpen("Overall execution time for full ML analysis: %f secs or %f hours or %f days\n\n", overallTime, overallTime/3600.0, overallTime/86400.0); #ifdef _WAYNE_MPI MPI_Finalize(); #endif exit(0); }
static inline void computeVectorCAT_FLEX(double *lVector, int *eVector, double ki, int i, double qz, double rz, traversalInfo *ti, double *EIGN, double *EI, double *EV, double *tipVector, unsigned char **yVector, int mxtips, const int states) { double *d1 = (double *)malloc(sizeof(double) * states), *d2 = (double *)malloc(sizeof(double) * states), *x1px2 = (double *)malloc(sizeof(double) * states), ump_x1, ump_x2, lz1, lz2, *x1, *x2, *x3; int scale, j, k, pNumber = ti->pNumber, rNumber = ti->rNumber, qNumber = ti->qNumber; x3 = &lVector[states * (pNumber - mxtips)]; switch(ti->tipCase) { case TIP_TIP: x1 = &(tipVector[states * yVector[qNumber][i]]); x2 = &(tipVector[states * yVector[rNumber][i]]); break; case TIP_INNER: x1 = &(tipVector[states * yVector[qNumber][i]]); x2 = &(lVector[states * (rNumber - mxtips)]); break; case INNER_INNER: x1 = &(lVector[states * (qNumber - mxtips)]); x2 = &(lVector[states * (rNumber - mxtips)]); break; default: assert(0); } lz1 = qz * ki; lz2 = rz * ki; d1[0] = x1[0]; d2[0] = x2[0]; for(j = 1; j < states; j++) { d1[j] = x1[j] * EXP(EIGN[j] * lz1); d2[j] = x2[j] * EXP(EIGN[j] * lz2); } for(j = 0; j < states; j++) { ump_x1 = 0.0; ump_x2 = 0.0; for(k = 0; k < states; k++) { ump_x1 += d1[k] * EI[j * states + k]; ump_x2 += d2[k] * EI[j * states + k]; } x1px2[j] = ump_x1 * ump_x2; } for(j = 0; j < states; j++) x3[j] = 0.0; for(j = 0; j < states; j++) for(k = 0; k < states; k++) x3[k] += x1px2[j] * EV[states * j + k]; scale = 1; for(j = 0; scale && (j < states); j++) scale = ((x3[j] < minlikelihood) && (x3[j] > minusminlikelihood)); if(scale) { for(j = 0; j < states; j++) x3[j] *= twotothe256; *eVector = *eVector + 1; } rax_free(d1); rax_free(d2); rax_free(x1px2); return; }
void computeNextReplicate(tree *tr, long *randomSeed, int *originalRateCategories, int *originalInvariant, boolean isRapid, boolean fixRates) { int j, model, w, *weightBuffer, endsite, *weights, i, l; for(j = 0; j < tr->originalCrunchedLength; j++) tr->cdta->aliaswgt[j] = 0; for(model = 0; model < tr->NumberOfModels; model++) { int nonzero = 0, pos = 0; for (j = 0; j < tr->originalCrunchedLength; j++) { if(tr->originalModel[j] == model) nonzero += tr->originalWeights[j]; } weightBuffer = (int *)rax_calloc(nonzero, sizeof(int)); for (j = 0; j < nonzero; j++) weightBuffer[(int) (nonzero*randum(randomSeed))]++; for(j = 0; j < tr->originalCrunchedLength; j++) { if(model == tr->originalModel[j]) { for(w = 0; w < tr->originalWeights[j]; w++) { tr->cdta->aliaswgt[j] += weightBuffer[pos]; pos++; } } } rax_free(weightBuffer); } endsite = 0; for (j = 0; j < tr->originalCrunchedLength; j++) { if(tr->cdta->aliaswgt[j] > 0) endsite++; } weights = tr->cdta->aliaswgt; for(i = 0; i < tr->rdta->numsp; i++) { unsigned char *yPos = &(tr->rdta->y0[((size_t)tr->originalCrunchedLength) * ((size_t)i)]), *origSeq = &(tr->rdta->yBUF[((size_t)tr->originalCrunchedLength) * ((size_t)i)]); for(j = 0, l = 0; j < tr->originalCrunchedLength; j++) if(tr->cdta->aliaswgt[j] > 0) yPos[l++] = origSeq[j]; } for(j = 0, l = 0; j < tr->originalCrunchedLength; j++) { if(weights[j]) { tr->cdta->aliaswgt[l] = tr->cdta->aliaswgt[j]; tr->dataVector[l] = tr->originalDataVector[j]; tr->model[l] = tr->originalModel[j]; if(isRapid) { tr->cdta->rateCategory[l] = originalRateCategories[j]; tr->invariant[l] = originalInvariant[j]; } l++; } } tr->cdta->endsite = endsite; fixModelIndices(tr, endsite, fixRates); { int count = 0; for(j = 0; j < tr->cdta->endsite; j++) count += tr->cdta->aliaswgt[j]; if(count != tr->rdta->sites) printf("count=%d\ttr->rdta->sites=%d\n",count, tr->rdta->sites ); assert(count == tr->rdta->sites); } }
void doInference(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) { int i, n; #ifdef _WAYNE_MPI int j, bestProcess; #endif double loopTime; topolRELL_LIST *rl = (topolRELL_LIST *)NULL; int best = -1, newBest = -1; double bestLH = unlikely; FILE *f; char bestTreeFileName[1024]; double overallTime; n = adef->multipleRuns; #ifdef _WAYNE_MPI if(n % processes != 0) n = processes * ((n / processes) + 1); #endif if(!tr->catOnly) { rl = (topolRELL_LIST *)rax_malloc(sizeof(topolRELL_LIST)); initTL(rl, tr, n); } #ifdef _WAYNE_MPI long parsimonySeed0 = adef->parsimonySeed; n = n / processes; #endif if(adef->rellBootstrap) { #ifdef _WAYNE_MPI tr->resample = permutationSH(tr, NUM_RELL_BOOTSTRAPS, parsimonySeed0 + 10000 * processID); #else tr->resample = permutationSH(tr, NUM_RELL_BOOTSTRAPS, adef->parsimonySeed); #endif tr->rellTrees = (treeList *)rax_malloc(sizeof(treeList)); initTreeList(tr->rellTrees, tr, NUM_RELL_BOOTSTRAPS); } else { tr->resample = (int *)NULL; tr->rellTrees = (treeList *)NULL; } for(i = 0; i < n; i++) { #ifdef _WAYNE_MPI if(i == 0) { if(parsimonySeed0 != 0) adef->parsimonySeed = parsimonySeed0 + 10000 * processID; } j = i + n * processID; tr->treeID = j; #else tr->treeID = i; #endif tr->checkPointCounter = 0; loopTime = gettime(); initModel(tr, rdta, cdta, adef); if(i == 0) printBaseFrequencies(tr); getStartingTree(tr, adef); computeBIGRAPID(tr, adef, TRUE); #ifdef _WAYNE_MPI if(tr->likelihood > bestLH) { best = j; bestLH = tr->likelihood; } if(!tr->catOnly) saveTL(rl, tr, j); #else if(tr->likelihood > bestLH) { best = i; bestLH = tr->likelihood; } if(!tr->catOnly) saveTL(rl, tr, i); #endif loopTime = gettime() - loopTime; writeInfoFile(adef, tr, loopTime); } assert(best >= 0); #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); n = n * processes; #endif if(tr->catOnly) { printBothOpenMPI("\n\nNOT conducting any final model optimizations on all %d trees under CAT-based model ....\n", n); printBothOpenMPI("\nREMEMBER that CAT-based likelihood scores are meaningless!\n\n", n); #ifdef _WAYNE_MPI if(processID != 0) { MPI_Finalize(); exit(0); } #endif } else { printBothOpenMPI("\n\nConducting final model optimizations on all %d trees under GAMMA-based models ....\n\n", n); #ifdef _WAYNE_MPI n = n / processes; #endif if(tr->rateHetModel == GAMMA || tr->rateHetModel == GAMMA_I) { restoreTL(rl, tr, best); evaluateGenericInitrav(tr, tr->start); if(!adef->useBinaryModelFile) modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); else { readBinaryModel(tr, adef); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); } bestLH = tr->likelihood; tr->likelihoods[best] = tr->likelihood; saveTL(rl, tr, best); tr->treeID = best; printResult(tr, adef, TRUE); newBest = best; for(i = 0; i < n; i++) { #ifdef _WAYNE_MPI j = i + n * processID; if(j != best) { restoreTL(rl, tr, j); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); tr->likelihoods[j] = tr->likelihood; if(tr->likelihood > bestLH) { newBest = j; bestLH = tr->likelihood; saveTL(rl, tr, j); } tr->treeID = j; printResult(tr, adef, TRUE); } if(n == 1 && processes == 1) printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName); else printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", j, tr->likelihoods[j], resultFileName, j); #else if(i != best) { restoreTL(rl, tr, i); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); tr->likelihoods[i] = tr->likelihood; if(tr->likelihood > bestLH) { newBest = i; bestLH = tr->likelihood; saveTL(rl, tr, i); } tr->treeID = i; printResult(tr, adef, TRUE); } if(n == 1) printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName); else printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", i, tr->likelihoods[i], resultFileName, i); #endif } } else { catToGamma(tr, adef); #ifdef _WAYNE_MPI for(i = 0; i < n; i++) { j = i + n*processID; rl->t[j]->likelihood = unlikely; } #else for(i = 0; i < n; i++) rl->t[i]->likelihood = unlikely; #endif initModel(tr, rdta, cdta, adef); restoreTL(rl, tr, best); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); tr->likelihoods[best] = tr->likelihood; bestLH = tr->likelihood; saveTL(rl, tr, best); tr->treeID = best; printResult(tr, adef, TRUE); newBest = best; for(i = 0; i < n; i++) { #ifdef _WAYNE_MPI j = i + n*processID; if(j != best) { restoreTL(rl, tr, j); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); tr->likelihoods[j] = tr->likelihood; if(tr->likelihood > bestLH) { newBest = j; bestLH = tr->likelihood; saveTL(rl, tr, j); } tr->treeID = j; printResult(tr, adef, TRUE); } if(n == 1 && processes == 1) printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName); else printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", j, tr->likelihoods[j], resultFileName, j); #else if(i != best) { restoreTL(rl, tr, i); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); tr->likelihoods[i] = tr->likelihood; if(tr->likelihood > bestLH) { newBest = i; bestLH = tr->likelihood; saveTL(rl, tr, i); } tr->treeID = i; printResult(tr, adef, TRUE); } if(n == 1) printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s\n", i, tr->likelihoods[i], resultFileName); else printBothOpen("Inference[%d] final GAMMA-based Likelihood: %f tree written to file %s.RUN.%d\n", i, tr->likelihoods[i], resultFileName, i); #endif } } assert(newBest >= 0); #ifdef _WAYNE_MPI if(processes > 1) { double *buffer = (double *)rax_malloc(sizeof(double) * processes); for(i = 0; i < processes; i++) buffer[i] = unlikely; buffer[processID] = bestLH; for(i = 0; i < processes; i++) MPI_Bcast(&buffer[i], 1, MPI_DOUBLE, i, MPI_COMM_WORLD); bestLH = buffer[0]; bestProcess = 0; for(i = 1; i < processes; i++) if(buffer[i] > bestLH) { bestLH = buffer[i]; bestProcess = i; } rax_free(buffer); } if(processID == bestProcess) { #endif restoreTL(rl, tr, newBest); evaluateGenericInitrav(tr, tr->start); printBothOpen("\n\nStarting final GAMMA-based thorough Optimization on tree %d likelihood %f .... \n\n", newBest, tr->likelihoods[newBest]); Thorough = 1; tr->doCutoff = FALSE; treeOptimizeThorough(tr, 1, 10); evaluateGenericInitrav(tr, tr->start); printBothOpen("Final GAMMA-based Score of best tree %f\n\n", tr->likelihood); strcpy(bestTreeFileName, workdir); strcat(bestTreeFileName, "RAxML_bestTree."); strcat(bestTreeFileName, run_id); Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); f = myfopen(bestTreeFileName, "wb"); fprintf(f, "%s", tr->tree_string); fclose(f); if(adef->perGeneBranchLengths) printTreePerGene(tr, adef, bestTreeFileName, "w"); #ifdef _WAYNE_MPI } #endif } if(adef->rellBootstrap) { //WARNING the functions below need to be invoked after all other trees have been printed //don't move this part of the code further up! int i; #ifdef _WAYNE_MPI FILE *f = myfopen(rellBootstrapFileNamePID, "wb"); #else FILE *f = myfopen(rellBootstrapFileName, "wb"); #endif for(i = 0; i < NUM_RELL_BOOTSTRAPS; i++) { restoreTreeList(tr->rellTrees, tr, i); Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); fprintf(f, "%s", tr->tree_string); } freeTreeList(tr->rellTrees); rax_free(tr->rellTrees); rax_free(tr->resample); fclose(f); #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); concatenateBSFiles(processes, rellBootstrapFileName); removeBSFiles(processes, rellBootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); if(processID == 0) printBothOpen("\nRELL bootstraps written to file %s\n", rellBootstrapFileName); #else printBothOpen("\nRELL bootstraps written to file %s\n", rellBootstrapFileName); #endif } #ifdef _WAYNE_MPI if(processID == bestProcess) { #endif overallTime = gettime() - masterTime; printBothOpen("Program execution info written to %s\n", infoFileName); if(!tr->catOnly) { printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName); if(adef->perGeneBranchLengths && tr->NumberOfModels > 1) printBothOpen("Per-Partition branch lengths of best-scoring ML tree written to %s.PARTITION.0 to %s.PARTITION.%d\n\n", bestTreeFileName, bestTreeFileName, tr->NumberOfModels - 1); } printBothOpen("Overall execution time: %f secs or %f hours or %f days\n\n", overallTime, overallTime/3600.0, overallTime/86400.0); #ifdef _WAYNE_MPI } #endif if(!tr->catOnly) { freeTL(rl); rax_free(rl); } #ifdef _WAYNE_MPI MPI_Finalize(); #endif exit(0); }
void doBootstrap(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) { int bootstrapsPerformed, i, n, treeVectorLength = -1; unsigned int vLength = 0; #ifdef _WAYNE_MPI int j, bootStopTests = 1; #endif double loopTime, pearsonAverage; hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int **)NULL; boolean bootStopIt = FALSE; n = adef->multipleRuns; #ifdef _WAYNE_MPI if(n % processes != 0) n = processes * ((n / processes) + 1); adef->multipleRuns = n; #endif if(adef->bootStopping) { h = initHashTable(tr->mxtips * 100); bitVectors = initBitVector(tr, &vLength); treeVectorLength = adef->multipleRuns; } #ifdef _WAYNE_MPI long parsimonySeed0 = adef->parsimonySeed; long replicateSeed0 = adef->rapidBoot; long bootstrapSeed0 = adef->boot; n = n / processes; #endif for(i = 0; i < n && !bootStopIt; i++) { loopTime = gettime(); #ifdef _WAYNE_MPI if(i == 0) { if(parsimonySeed0 != 0) adef->parsimonySeed = parsimonySeed0 + 10000 * processID; adef->rapidBoot = replicateSeed0 + 10000 * processID; adef->boot = bootstrapSeed0 + 10000 * processID; } j = i + n*processID; singleBootstrap(tr, j, adef, rdta, cdta); #else singleBootstrap(tr, i, adef, rdta, cdta); #endif loopTime = gettime() - loopTime; writeInfoFile(adef, tr, loopTime); if(adef->bootStopping) #ifdef _WAYNE_MPI { int nn = (i + 1) * processes; if((nn > START_BSTOP_TEST) && (i * processes < FC_SPACING * bootStopTests) && ((i + 1) * processes >= FC_SPACING * bootStopTests) ) { MPI_Barrier(MPI_COMM_WORLD); concatenateBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); bootStopIt = computeBootStopMPI(tr, bootstrapFileName, adef, &pearsonAverage); bootStopTests++; } } #else bootStopIt = bootStop(tr, h, i, &pearsonAverage, bitVectors, treeVectorLength, vLength, adef); #endif } #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); bootstrapsPerformed = i * processes; if(processID == 0) { if(!adef->bootStopping) concatenateBSFiles(processes, bootstrapFileName); removeBSFiles(processes, bootstrapFileName); } MPI_Barrier(MPI_COMM_WORLD); #else bootstrapsPerformed = i; #endif adef->multipleRuns = bootstrapsPerformed; if(adef->bootStopping) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); if(bootStopIt) { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MRE_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } else { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MR_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } } }