static nodeptr uprootTree (tree *tr, nodeptr p, boolean readBranchLengths, boolean readConstraint) { nodeptr q, r, s, start; int n, i; for(i = tr->mxtips + 1; i < 2 * tr->mxtips - 1; i++) assert(i == tr->nodep[i]->number); if(isTip(p->number, tr->mxtips) || p->back) { printf("ERROR: Unable to uproot tree.\n"); printf(" Inappropriate node marked for removal.\n"); assert(0); } assert(p->back == (nodeptr)NULL); tr->nextnode = tr->nextnode - 1; assert(tr->nextnode < 2 * tr->mxtips); n = tr->nextnode; assert(tr->nodep[tr->nextnode]); if (n != tr->mxtips + tr->ntips - 1) { printf("ERROR: Unable to uproot tree. Inconsistent\n"); printf(" number of tips and nodes for rooted tree.\n"); assert(0); } q = p->next->back; /* remove p from tree */ r = p->next->next->back; assert(p->back == (nodeptr)NULL); if(readBranchLengths) { double b[NUM_BRANCHES]; int i; for(i = 0; i < tr->numBranches; i++) b[i] = (r->z[i] + q->z[i]); hookup (q, r, b, tr->numBranches); } else hookupDefault(q, r, tr->numBranches); if(readConstraint && tr->grouped) { if(tr->constraintVector[p->number] != 0) { printf("Root node to remove should have top-level grouping of 0\n"); assert(0); } } assert(!(isTip(r->number, tr->mxtips) && isTip(q->number, tr->mxtips))); assert(p->number > tr->mxtips); if(tr->ntips > 2 && p->number != n) { q = tr->nodep[n]; /* transfer last node's conections to p */ r = q->next; s = q->next->next; if(readConstraint && tr->grouped) tr->constraintVector[p->number] = tr->constraintVector[q->number]; hookup(p, q->back, q->z, tr->numBranches); /* move connections to p */ hookup(p->next, r->back, r->z, tr->numBranches); hookup(p->next->next, s->back, s->z, tr->numBranches); q->back = q->next->back = q->next->next->back = (nodeptr) NULL; } else p->back = p->next->back = p->next->next->back = (nodeptr) NULL; assert(tr->ntips > 2); start = findAnyTip(tr->nodep[tr->mxtips + 1], tr->mxtips); assert(isTip(start->number, tr->mxtips)); tr->rooted = FALSE; return start; }
int treeReadLen (FILE *fp, tree *tr, boolean readBranches, boolean readNodeLabels, boolean topologyOnly) { nodeptr p; int i, ch, lcount = 0; for (i = 1; i <= tr->mxtips; i++) { tr->nodep[i]->back = (node *) NULL; /*if(topologyOnly) tr->nodep[i]->support = -1;*/ } for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++) { tr->nodep[i]->back = (nodeptr)NULL; tr->nodep[i]->next->back = (nodeptr)NULL; tr->nodep[i]->next->next->back = (nodeptr)NULL; tr->nodep[i]->number = i; tr->nodep[i]->next->number = i; tr->nodep[i]->next->next->number = i; /*if(topologyOnly) { tr->nodep[i]->support = -2; tr->nodep[i]->next->support = -2; tr->nodep[i]->next->next->support = -2; }*/ } if(topologyOnly) tr->start = tr->nodep[tr->mxtips]; else tr->start = tr->nodep[1]; tr->ntips = 0; tr->nextnode = tr->mxtips + 1; for(i = 0; i < tr->numBranches; i++) tr->partitionSmoothed[i] = FALSE; tr->rooted = FALSE; p = tr->nodep[(tr->nextnode)++]; while((ch = treeGetCh(fp)) != '('); if(!topologyOnly) assert(readBranches == FALSE && readNodeLabels == FALSE); if (! addElementLen(fp, tr, p, readBranches, readNodeLabels, &lcount)) assert(0); if (! treeNeedCh(fp, ',', "in")) assert(0); if (! addElementLen(fp, tr, p->next, readBranches, readNodeLabels, &lcount)) assert(0); if (! tr->rooted) { if ((ch = treeGetCh(fp)) == ',') { if (! addElementLen(fp, tr, p->next->next, readBranches, readNodeLabels, &lcount)) assert(0); } else { /* A rooted format */ tr->rooted = TRUE; if (ch != EOF) (void) ungetc(ch, fp); } } else { p->next->next->back = (nodeptr) NULL; } if (! treeNeedCh(fp, ')', "in")) assert(0); if(topologyOnly) assert(!(tr->rooted && readNodeLabels)); (void) treeFlushLabel(fp); if (! treeFlushLen(fp)) assert(0); if (! treeNeedCh(fp, ';', "at end of")) assert(0); if (tr->rooted) { assert(!readNodeLabels); p->next->next->back = (nodeptr) NULL; tr->start = uprootTree(tr, p->next->next, FALSE, FALSE); if (! tr->start) { printf("FATAL ERROR UPROOTING TREE\n"); assert(0); } } else tr->start = findAnyTip(p, tr->mxtips); assert(tr->ntips == tr->mxtips); return lcount; }
nodeptr findAnyTip(nodeptr p, int numsp) { return isTip(p->number, numsp) ? p : findAnyTip(p->next->back, numsp); }
boolean treeReadLenMULT (FILE *fp, tree *tr, analdef *adef) { nodeptr p, r, s; int i, ch, n, rn; int partitionCounter = 0; double randomResolution; srand((unsigned int) time(NULL)); for(i = 0; i < 2 * tr->mxtips; i++) tr->constraintVector[i] = -1; for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL; for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++) { tr->nodep[i]->back = (nodeptr)NULL; tr->nodep[i]->next->back = (nodeptr)NULL; tr->nodep[i]->next->next->back = (nodeptr)NULL; tr->nodep[i]->number = i; tr->nodep[i]->next->number = i; tr->nodep[i]->next->next->number = i; } tr->start = tr->nodep[tr->mxtips]; tr->ntips = 0; tr->nextnode = tr->mxtips + 1; for(i = 0; i < tr->numBranches; i++) tr->partitionSmoothed[i] = FALSE; tr->rooted = FALSE; p = tr->nodep[(tr->nextnode)++]; while((ch = treeGetCh(fp)) != '('); if (! addElementLenMULT(fp, tr, p, partitionCounter)) return FALSE; if (! treeNeedCh(fp, ',', "in")) return FALSE; if (! addElementLenMULT(fp, tr, p->next, partitionCounter)) return FALSE; if (! tr->rooted) { if ((ch = treeGetCh(fp)) == ',') { if (! addElementLenMULT(fp, tr, p->next->next, partitionCounter)) return FALSE; while((ch = treeGetCh(fp)) == ',') { n = (tr->nextnode)++; assert(n <= 2*(tr->mxtips) - 2); r = tr->nodep[n]; tr->constraintVector[r->number] = partitionCounter; rn = randomInt(10000); if(rn == 0) randomResolution = 0; else randomResolution = ((double)rn)/10000.0; if(randomResolution < 0.5) { s = p->next->next->back; r->back = p->next->next; p->next->next->back = r; r->next->back = s; s->back = r->next; addElementLenMULT(fp, tr, r->next->next, partitionCounter); } else { s = p->next->back; r->back = p->next; p->next->back = r; r->next->back = s; s->back = r->next; addElementLenMULT(fp, tr, r->next->next, partitionCounter); } } if(ch != ')') { printf("Missing /) in treeReadLenMULT\n"); exit(-1); } else ungetc(ch, fp); } else { tr->rooted = TRUE; if (ch != EOF) (void) ungetc(ch, fp); } } else { p->next->next->back = (nodeptr) NULL; } if (! treeNeedCh(fp, ')', "in")) return FALSE; (void) treeFlushLabel(fp); if (! treeFlushLen(fp, tr)) return FALSE; if (! treeNeedCh(fp, ';', "at end of")) return FALSE; if (tr->rooted) { p->next->next->back = (nodeptr) NULL; tr->start = uprootTree(tr, p->next->next, FALSE, TRUE); if (! tr->start) return FALSE; } else { tr->start = findAnyTip(p, tr->rdta->numsp); } if(tr->ntips < tr->mxtips) makeParsimonyTreeIncomplete(tr, adef); if(!adef->rapidBoot) onlyInitrav(tr, tr->start); return TRUE; }
int treeReadLen (FILE *fp, tree *tr, boolean readBranches, boolean readNodeLabels, boolean topologyOnly, analdef *adef, boolean completeTree, boolean storeBranchLabels) { nodeptr p; int i, ch, lcount = 0; tr->branchLabelCounter = 0; for (i = 1; i <= tr->mxtips; i++) { tr->nodep[i]->back = (node *) NULL; if(topologyOnly) tr->nodep[i]->support = -1; } for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++) { tr->nodep[i]->back = (nodeptr)NULL; tr->nodep[i]->next->back = (nodeptr)NULL; tr->nodep[i]->next->next->back = (nodeptr)NULL; tr->nodep[i]->number = i; tr->nodep[i]->next->number = i; tr->nodep[i]->next->next->number = i; if(topologyOnly) { tr->nodep[i]->support = -2; tr->nodep[i]->next->support = -2; tr->nodep[i]->next->next->support = -2; } } if(topologyOnly) tr->start = tr->nodep[tr->mxtips]; else tr->start = tr->nodep[1]; tr->ntips = 0; tr->nextnode = tr->mxtips + 1; for(i = 0; i < tr->numBranches; i++) tr->partitionSmoothed[i] = FALSE; tr->rooted = FALSE; tr->wasRooted = FALSE; p = tr->nodep[(tr->nextnode)++]; while((ch = treeGetCh(fp)) != '('); if(!topologyOnly) { if(adef->mode != CLASSIFY_ML) { if(adef->mode != OPTIMIZE_BR_LEN_SCALER) assert(readBranches == FALSE && readNodeLabels == FALSE); else assert(readBranches == TRUE && readNodeLabels == FALSE); } else { if(adef->useBinaryModelFile) assert(readBranches == TRUE && readNodeLabels == FALSE); else assert(readBranches == FALSE && readNodeLabels == FALSE); } } if (! addElementLen(fp, tr, p, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels)) assert(0); if (! treeNeedCh(fp, ',', "in")) assert(0); if (! addElementLen(fp, tr, p->next, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels)) assert(0); if (! tr->rooted) { if ((ch = treeGetCh(fp)) == ',') { if (! addElementLen(fp, tr, p->next->next, readBranches, readNodeLabels, &lcount, adef, storeBranchLabels)) assert(0); } else { /* A rooted format */ tr->rooted = TRUE; tr->wasRooted = TRUE; if (ch != EOF) (void) ungetc(ch, fp); } } else { p->next->next->back = (nodeptr) NULL; tr->wasRooted = TRUE; } if(!tr->rooted && adef->mode == ANCESTRAL_STATES) { printf("Error: The ancestral state computation mode requires a rooted tree as input, exiting ....\n"); exit(0); } if (! treeNeedCh(fp, ')', "in")) assert(0); if(topologyOnly) assert(!(tr->rooted && readNodeLabels)); (void) treeFlushLabel(fp); if (! treeFlushLen(fp, tr)) assert(0); if (! treeNeedCh(fp, ';', "at end of")) assert(0); if (tr->rooted) { assert(!readNodeLabels); p->next->next->back = (nodeptr) NULL; tr->start = uprootTree(tr, p->next->next, readBranches, FALSE); /*tr->leftRootNode = p->back; tr->rightRootNode = p->next->back; */ if (! tr->start) { printf("FATAL ERROR UPROOTING TREE\n"); assert(0); } } else tr->start = findAnyTip(p, tr->rdta->numsp); if(!topologyOnly || adef->mode == CLASSIFY_MP) { assert(tr->ntips <= tr->mxtips); if(tr->ntips < tr->mxtips) { if(completeTree) { printBothOpen("Hello this is your friendly RAxML tree parsing routine\n"); printBothOpen("The RAxML option you are uisng requires to read in only complete trees\n"); printBothOpen("with %d taxa, there is at least one tree with %d taxa though ... exiting\n", tr->mxtips, tr->ntips); exit(-1); } else { if(adef->computeDistance) { printBothOpen("Error: pairwise distance computation only allows for complete, i.e., containing all taxa\n"); printBothOpen("bifurcating starting trees\n"); exit(-1); } if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP) { printBothOpen("RAxML placement algorithm: You provided a reference tree with %d taxa; alignmnet has %d taxa\n", tr->ntips, tr->mxtips); printBothOpen("%d query taxa will be placed using %s\n", tr->mxtips - tr->ntips, (adef->mode == CLASSIFY_ML)?"maximum likelihood":"parsimony"); if(adef->mode == CLASSIFY_ML) classifyML(tr, adef); else { assert(adef->mode == CLASSIFY_MP); classifyMP(tr, adef); } } else { printBothOpen("You provided an incomplete starting tree %d alignmnet has %d taxa\n", tr->ntips, tr->mxtips); makeParsimonyTreeIncomplete(tr, adef); } } } else { if(adef->mode == PARSIMONY_ADDITION) { printBothOpen("Error you want to add sequences to a trees via MP stepwise addition, but \n"); printBothOpen("you have provided an input tree that already contains all taxa\n"); exit(-1); } if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP) { printBothOpen("Error you want to place query sequences into a tree using %s, but\n", tr->mxtips - tr->ntips, (adef->mode == CLASSIFY_ML)?"maximum likelihood":"parsimony"); printBothOpen("you have provided an input tree that already contains all taxa\n"); exit(-1); } } onlyInitrav(tr, tr->start); } return lcount; }
int treeReadLen (FILE *fp, tree *tr, boolean readBranches, boolean readNodeLabels, boolean topologyOnly, analdef *adef, boolean completeTree) { nodeptr p; int i, ch, lcount = 0; for (i = 1; i <= tr->mxtips; i++) { tr->nodep[i]->back = (node *) NULL; if(topologyOnly) tr->nodep[i]->support = -1; } for(i = tr->mxtips + 1; i < 2 * tr->mxtips; i++) { tr->nodep[i]->back = (nodeptr)NULL; tr->nodep[i]->next->back = (nodeptr)NULL; tr->nodep[i]->next->next->back = (nodeptr)NULL; tr->nodep[i]->number = i; tr->nodep[i]->next->number = i; tr->nodep[i]->next->next->number = i; if(topologyOnly) { tr->nodep[i]->support = -2; tr->nodep[i]->next->support = -2; tr->nodep[i]->next->next->support = -2; } } if(topologyOnly) tr->start = tr->nodep[tr->mxtips]; else tr->start = tr->nodep[1]; tr->ntips = 0; tr->nextnode = tr->mxtips + 1; for(i = 0; i < tr->numBranches; i++) tr->partitionSmoothed[i] = FALSE; tr->rooted = FALSE; p = tr->nodep[(tr->nextnode)++]; while((ch = treeGetCh(fp)) != '('); if(!topologyOnly) assert(readBranches == FALSE && readNodeLabels == FALSE); if (! addElementLen(fp, tr, p, readBranches, readNodeLabels, &lcount)) assert(0); if (! treeNeedCh(fp, ',', "in")) assert(0); if (! addElementLen(fp, tr, p->next, readBranches, readNodeLabels, &lcount)) assert(0); if (! tr->rooted) { if ((ch = treeGetCh(fp)) == ',') { if (! addElementLen(fp, tr, p->next->next, readBranches, readNodeLabels, &lcount)) assert(0); } else { /* A rooted format */ tr->rooted = TRUE; if (ch != EOF) (void) ungetc(ch, fp); } } else { p->next->next->back = (nodeptr) NULL; } if (! treeNeedCh(fp, ')', "in")) assert(0); if(topologyOnly) assert(!(tr->rooted && readNodeLabels)); (void) treeFlushLabel(fp); if (! treeFlushLen(fp)) assert(0); if (! treeNeedCh(fp, ';', "at end of")) assert(0); if (tr->rooted) { assert(!readNodeLabels); p->next->next->back = (nodeptr) NULL; tr->start = uprootTree(tr, p->next->next, FALSE, FALSE); if (! tr->start) { printf("FATAL ERROR UPROOTING TREE\n"); assert(0); } } else tr->start = findAnyTip(p, tr->rdta->numsp); if(!topologyOnly) { setupPointerMesh(tr); assert(tr->ntips <= tr->mxtips); if(tr->ntips < tr->mxtips) { if(completeTree) { printBothOpen("Hello this is your friendly RAxML tree parsing routine\n"); printBothOpen("The RAxML option you are uisng requires to read in only complete trees\n"); printBothOpen("with %d taxa, there is at least one tree with %d taxa though ... exiting\n", tr->mxtips, tr->ntips); exit(-1); } else { if(adef->computeDistance) { printBothOpen("Error: pairwise distance computation only allows for complete, i.e., containing all taxa\n"); printBothOpen("bifurcating starting trees\n"); exit(-1); } if(adef->mode == CLASSIFY_ML) { printBothOpen("RAxML classifier Algo: You provided a reference tree with %d taxa; alignmnet has %d taxa\n", tr->ntips, tr->mxtips); printBothOpen("%d query taxa will be classifed under ML\n", tr->mxtips - tr->ntips); classifyML(tr, adef); } else { printBothOpen("You provided an incomplete starting tree %d alignmnet has %d taxa\n", tr->ntips, tr->mxtips); makeParsimonyTreeIncomplete(tr, adef); } } } else { if(adef->mode == PARSIMONY_ADDITION) { printBothOpen("Error you want to add sequences to a trees via MP stepwise addition, but \n"); printBothOpen("you have provided an input tree that already contains all taxa\n"); exit(-1); } if(adef->mode == CLASSIFY_ML) { printBothOpen("Error you want to classify query sequences into a tree via ML, but \n"); printBothOpen("you have provided an input tree that already contains all taxa\n"); exit(-1); } } onlyInitrav(tr, tr->start); } return lcount; }
void computePlacementBias(tree *tr, analdef *adef) { int windowSize = adef->slidingWindowSize, k, i, tips, numTraversalBranches = (2 * (tr->mxtips - 1)) - 3; /* compute number of branches into which we need to insert once we have removed a taxon */ char fileName[1024]; FILE *outFile; /* data for each sliding window starting position */ positionData *pd = (positionData *)malloc(sizeof(positionData) * (tr->cdta->endsite - windowSize)); double *nodeDistances = (double*)calloc(tr->cdta->endsite, sizeof(double)), /* array to store node distnces ND for every sliding window position */ *distances = (double*)calloc(tr->cdta->endsite, sizeof(double)); /* array to store avg distances for every site */ strcpy(fileName, workdir); strcat(fileName, "RAxML_SiteSpecificPlacementBias."); strcat(fileName, run_id); outFile = myfopen(fileName, "w"); printBothOpen("Likelihood of comprehensive tree %f\n\n", tr->likelihood); if(windowSize > tr->cdta->endsite) { printBothOpen("The size of your sliding window is %d while the number of sites in the alignment is %d\n\n", windowSize, tr->cdta->endsite); exit(-1); } if(windowSize >= (int)(0.9 * tr->cdta->endsite)) printBothOpen("WARNING: your sliding window of size %d is only slightly smaller than you alignment that has %d sites\n\n", windowSize, tr->cdta->endsite); printBothOpen("Sliding window size: %d\n\n", windowSize); /* prune and re-insert on tip at a time into all branches of the remaining tree */ for(tips = 1; tips <= tr->mxtips; tips++) { nodeptr myStart, p = tr->nodep[tips]->back, /* this is the node at which we are prunung */ p1 = p->next->back, p2 = p->next->next->back; double pz[NUM_BRANCHES], p1z[NUM_BRANCHES], p2z[NUM_BRANCHES]; int branchCounter = 0; /* reset array values for this tip */ for(i = 0; i < tr->cdta->endsite; i++) { pd[i].lh = unlikely; pd[i].p = (nodeptr)NULL; } /* store the three branch lengths adjacent to the position at which we prune */ for(i = 0; i < tr->numBranches; i++) { p1z[i] = p1->z[i]; p2z[i] = p2->z[i]; pz[i] = p->z[i]; } /* prune the taxon, optimizing the branch between p1 and p2 */ removeNodeBIG(tr, p, tr->numBranches); printBothOpen("Pruning taxon Number %d [%s]\n", tips, tr->nameList[tips]); /* find any tip to start traversing the tree */ myStart = findAnyTip(p1, tr->mxtips); /* insert taxon, compute likelihood and remove taxon again from all branches */ traverseBias(p, myStart->back, tr, &branchCounter, pd, windowSize); assert(branchCounter == numTraversalBranches); /* for every sliding window position calc ND to the true/correct position at p */ for(i = 0; i < tr->cdta->endsite - windowSize; i++) nodeDistances[i] = getNodeDistance(p1, pd[i].p, tr->mxtips); /* now analyze */ for(i = 0; i < tr->cdta->endsite; i++) { double d = 0.0; int s = 0; /* check site position, i.e., doe we have windowSize data points available or fewer because we are at the start or the end of the alignment */ /* for each site just accumulate the node distances we have for all sliding windows that passed over this site */ if(i < windowSize) { for(k = 0; k < i + 1; k++, s++) d += nodeDistances[k]; } else { if(i < tr->cdta->endsite - windowSize) { for(k = i - windowSize + 1; k <= i; k++, s++) d += nodeDistances[k]; } else { for(k = i - windowSize; k < (tr->cdta->endsite - windowSize); k++, s++) d += nodeDistances[k + 1]; } } /* now just divide the accumultaed ND distance by the number of distances we have for this position and then add it to the acc distances over all taxa. I just realized that the version on which I did the tests I sent to Simon I used distances[i] = d / ((double)s); instead of distances[i] += d / ((double)s); gamo tin poutana mou */ distances[i] += (d / ((double)s)); } /* re-connect taxon to its original position */ hookup(p->next, p1, p1z, tr->numBranches); hookup(p->next->next, p2, p2z, tr->numBranches); hookup(p, p->back, pz, tr->numBranches); /* fix likelihood vectors */ newviewGeneric(tr, p); } /* now just compute the average ND over all taxa */ for(i = 0; i < tr->cdta->endsite; i++) { double avg = distances[i] / ((double)tr->mxtips); fprintf(outFile, "%d %f\n", i, avg); } printBothOpen("\nTime for EPA-based site-specific placement bias calculation: %f\n", gettime() - masterTime); printBothOpen("Site-specific placement bias statistics written to file %s\n", fileName); fclose(outFile); exit(0); }