void main (int argc, char*argv[]) { int lline=32000, i,j, ch, jeffnode, inode; char line[32000], mcmcf[96]="o.multidivtime"; FILE *fmcmc; double t; puts("Usage:\n\tTreeTimeJeff <MultidivtimeOutputFile>\n"); if(argc>1) strcpy(mcmcf, argv[1]); fmcmc=gfopen(mcmcf, "r"); /* Read root node number */ for( ; ; ) { if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file"); if(strstr(line, "Root node number of master tree is")==NULL) continue; sscanf(line+35, "%d", &j); com.ns=j/2+1; break; } printf("Tree has %d taxa.\n\n", com.ns); /* read tree. JeffNode read into [].branch */ for(; ; ) { ch=fgetc(fmcmc); if(ch==EOF) error2("EOF treefile"); if(ch=='(') { ungetc(ch,fmcmc); break; } } ReadTreeN(fmcmc, &i, &j, 2, 0); OutTreeN(F0,1,0); FPN(F0); FPN(F0); /* read posterior time estimates */ for(i=0; i<tree.nnode; i++) nodes[i].age=0; for( ; ; ) { if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file"); if(strstr(line, "Actual time node")==NULL) continue; sscanf(line+17, "%d =%lf", &jeffnode, &t); if(jeffnode<com.ns) { if(t>0) nodes[inode].age=t; } else { inode=tree.nnode-1+com.ns-jeffnode; printf("JeffNode %3d ZihengNode %3d time %9.6f\n", jeffnode,inode+1,t); nodes[inode].age=t; if(inode==com.ns) break; if(jeffnode-nodes[inode].branch != 0) printf(" node number error. "); } } for(i=0; i<tree.nnode; i++) if(i!=tree.root) nodes[i].branch=nodes[nodes[i].father].age-nodes[i].age; FPN(F0); OutTreeN(F0,1,1); FPN(F0); fclose(fmcmc); exit(0); }
void TreeDistanceDistribution (FILE* fout) { /* This calculates figure 3.7 of Yang (2006). This reads the file of all trees (such as 7s.all.trees), and calculates the distribution of partition distance in all pairwise comparisons. */ int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0; char treef[64]="5s.all.trees", *partition; FILE *ftree; double mPD[NS], PD1[NS]; /* distribution of partition distances */ puts("Tree file name?"); scanf ("%s", treef); ftree=gfopen (treef,"r"); fscanf (ftree, "%d%d", &com.ns, &ntree); printf("%2d sequences %2d trees.\n", com.ns, ntree); i=(com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); lpart = (com.ns-1)*com.ns*sizeof(char); i = ntree*lpart; printf("\n%d bytes of space requested.\n", i); partition = (char*)malloc(i); nib = (int*)malloc(ntree*sizeof(int)); if (partition==NULL || nib==NULL) error2("out of memory"); puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n"); fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout); for (i=0; i<ntree; i++) { ReadTreeN (ftree, &j, &k, 0, 1); nib[i]=tree.nbranch-com.ns; Tree2Partition(partition+i*lpart); } for(k=0; k<com.ns-3; k++) mPD[k]=0; for (i=0; i<ntree; i++,FPN(fout)) { for(k=0; k<com.ns-3; k++) PD1[k]=0; for (j=0; j<ntree; j++) { if(j==i) continue; nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame); PD1[nsame] ++; } for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.); for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.); printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100); for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]); fprintf(fout, "%8d:", i+1); for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]); printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r")); } free(partition); free(nodes); free(nib); fclose(ftree); exit(0); }
void TreeDistances (FILE* fout) { /* I think this is broken after i changed the routine Tree2Partition(). */ int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0; char treef[64]="5s.all.trees", *partition; FILE *ftree; double psame, mp, vp; /* TreeDistanceDistribution(fout); */ puts("\nNumber of identical bi-partitions between trees.\nTree file name?"); scanf ("%s", treef); ftree=gfopen (treef,"r"); fscanf (ftree, "%d%d", &com.ns, &ntree); printf("%2d sequences %2d trees.\n", com.ns, ntree); i=(com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); if(ntree<2) error2("ntree"); printf ("\n%d species, %d trees\n", com.ns, ntree); puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n"); k=2; scanf("%d", &k); lpart=(com.ns-1)*com.ns*sizeof(char); i=(k==1?2:ntree)*lpart; printf("\n%d bytes of space requested.\n", i); partition=(char*)malloc(i); nib=(int*)malloc(ntree*sizeof(int)); if (partition==NULL || nib==NULL) error2("out of memory"); if(k==2) { /* pairwise comparisons */ fputs("Number of identical bi-partitions in pairwise comparisons\n",fout); for (i=0; i<ntree; i++) { ReadTreeN (ftree, &j, &k, 0, 1); nib[i]=tree.nbranch-com.ns; Tree2Partition(partition+i*lpart); } for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) { printf("%2d (%2d):", i+1,nib[i]); fprintf(fout,"%2d (%2d):", i+1,nib[i]); for (j=0; j<i; j++) { nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame); printf(" %2d", nsame); fprintf(fout," %2d", nsame); } } } else { /* first vs. others */ ReadTreeN (ftree, &j, &k, 0, 1); nib[0]=tree.nbranch-com.ns; if (nib[0]==0) error2("1st tree is a star tree.."); Tree2Partition (partition); fputs ("Comparing the first tree with the others\nFirst tree:\n",fout); OutTreeN(fout,0,0); FPN(fout); OutTreeB(fout); FPN(fout); fputs ("\nInternal branches in the first tree:\n",fout); FOR(i,nib[0]) { k=parti2B[i]; fprintf(fout,"%3d (%2d..%-2d): ( ", i+1,tree.branches[k][0]+1,tree.branches[k][1]+1); FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1); fputs(")\n",fout); } if(nodes[tree.root].nson<=2) fputs("\nRooted tree, results may not be correct.\n",fout); fputs("\nCorrect internal branches compared with the 1st tree:\n",fout); FOR(k,nib[0]) nIBsame[k]=0; for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) { ReadTreeN (ftree, &j, &k, 0, 1); nib[1]=tree.nbranch-com.ns; Tree2Partition(partition+lpart); nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame); psame=nsame/(double)nib[0]; FOR(k,nib[0]) nIBsame[k]+=IBsame[k]; fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame); FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1); printf("1 vs. %5d: %6d/%d %10.4f\n", i+1,nsame,nib[0],psame); vp += square(psame - mp)*(i-1.)/i; mp=(mp*(i-1.) + psame)/i; } vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.)))); fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n"); fprintf(fout,"between the 1st and all the other %d trees ", ntree-1); fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp); fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1); fprintf(fout,"interior branches of tree 1 are present"); fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout); FOR(k,nib[0]) { i=tree.branches[parti2B[k]][0]+1; j=tree.branches[parti2B[k]][1]+1; fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n", k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.)); }
void LabelClades(FILE *fout) { /* This reads in a tree and scan species names to check whether they form a paraphyletic group and then label the clade. It assumes that the tree is unrooted, and so goes through two rounds to check whether the remaining seqs form a monophyletic clade. */ FILE *ftree; int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength; char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end"; int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8; int debug; printf("Tree file name? "); scanf ("%s", treef); printf("Treat tree as unrooted (0 no, 1 yes)? "); scanf ("%d", &unrooted); ftree = gfopen (treef,"r"); fscanf (ftree, "%d%d", &com.ns, &j); if(com.ns<=0) error2("need ns in tree file"); debug = (com.ns<20); i = (com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); for(i=0; i<com.ns*2-1; i++) nodes[i].nodeStr = NULL; for(i=0; i<com.ns-1; i++) { anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int)); if(anc[i]==NULL) error2("oom"); } ReadTreeN(ftree, &haslength, &j, 1, 0); fclose(ftree); if(debug) { OutTreeN(F0, 1, PrNodeNum); FPN(F0); } for(iclade=0; iclade<com.ns-1; iclade++) { printf("\nString for selecting sequences (followed by non-digit) (end to end)? "); scanf("%s", key); if(strcmp(endstr, key) == 0) break; for(i=0; i<com.ns; i++) chosen[i] = '\0'; k = strlen(key); for(i=0; i<com.ns; i++) { if( (p=strstr(com.spname[i], key)) && !isdigit(p[k]) ) chosen[i] = 1; } /* for(i=0; i<com.ns; i++) if(strstr(com.spname[i], key)) chosen[i] = 1; */ /* look for MRCA, going through two rounds, assuming unrooted tree */ for(imrca=0; imrca<1+unrooted; imrca++) { if(imrca) for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i]; for(i=0,sizeclade=0; i<com.ns; i++) if(chosen[i]) { sizeclade ++; lasts = i; } if(sizeclade <= 1 || sizeclade >= com.ns-1) { puts("unable to form a clade. <2 seqs."); break; } for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++) anc[i][j] = 0; for(is=0; is<com.ns; is++) { if(chosen[is]==0) continue; loc = is/SI; bitmask = 1 << (is%SI); for(j=nodes[is].father; j!=-1; j=nodes[j].father) { anc[j-com.ns][loc] |= bitmask; if(is==lasts) { for(i=0,k=0; i<com.ns; i++) if(anc[j-com.ns][i/SI] & (1<<(i%SI))) k ++; if(k==sizeclade) { mrca = j; break; } } } } if(imrca==0 && mrca!=tree.root) /* 1st round is enough */ break; } if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) { printf("Unable to label. Ignored."); continue; } if(debug) for(is=0; is<com.ns-1; is++) { printf("\nnode %4d: ", is+com.ns); for(j=0; j<com.ns; j++) { loc = j/SI; bitmask = 1 << (j%SI); printf(" %d", (anc[is][loc] & bitmask) != 0); } } printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1); for(is=0,paraphyl=0; is<com.ns; is++) { if(chosen[is] == 0) for(j=nodes[is].father; j!=-1; j=nodes[j].father) if(j==mrca) { paraphyl++; break; } } if(paraphyl) printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl); nodes[mrca].label = iclade+1; if(debug) OutTreeN(F0, 1, haslength|PrLabel); } for(i=0; i<com.ns-1; i++) free(anc[i]); OutTreeN(fout, 1, haslength|PrLabel); FPN(fout); printf("Printed final tree with labels in evolver.out\n"); exit(0); }