void main (int argc, char*argv[]) { int lline=32000, i,j, ch, jeffnode, inode; char line[32000], mcmcf[96]="o.multidivtime"; FILE *fmcmc; double t; puts("Usage:\n\tTreeTimeJeff <MultidivtimeOutputFile>\n"); if(argc>1) strcpy(mcmcf, argv[1]); fmcmc=gfopen(mcmcf, "r"); /* Read root node number */ for( ; ; ) { if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file"); if(strstr(line, "Root node number of master tree is")==NULL) continue; sscanf(line+35, "%d", &j); com.ns=j/2+1; break; } printf("Tree has %d taxa.\n\n", com.ns); /* read tree. JeffNode read into [].branch */ for(; ; ) { ch=fgetc(fmcmc); if(ch==EOF) error2("EOF treefile"); if(ch=='(') { ungetc(ch,fmcmc); break; } } ReadTreeN(fmcmc, &i, &j, 2, 0); OutTreeN(F0,1,0); FPN(F0); FPN(F0); /* read posterior time estimates */ for(i=0; i<tree.nnode; i++) nodes[i].age=0; for( ; ; ) { if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file"); if(strstr(line, "Actual time node")==NULL) continue; sscanf(line+17, "%d =%lf", &jeffnode, &t); if(jeffnode<com.ns) { if(t>0) nodes[inode].age=t; } else { inode=tree.nnode-1+com.ns-jeffnode; printf("JeffNode %3d ZihengNode %3d time %9.6f\n", jeffnode,inode+1,t); nodes[inode].age=t; if(inode==com.ns) break; if(jeffnode-nodes[inode].branch != 0) printf(" node number error. "); } } for(i=0; i<tree.nnode; i++) if(i!=tree.root) nodes[i].branch=nodes[nodes[i].father].age-nodes[i].age; FPN(F0); OutTreeN(F0,1,1); FPN(F0); fclose(fmcmc); exit(0); }
void TreeDistanceDistribution (FILE* fout) { /* This calculates figure 3.7 of Yang (2006). This reads the file of all trees (such as 7s.all.trees), and calculates the distribution of partition distance in all pairwise comparisons. */ int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0; char treef[64]="5s.all.trees", *partition; FILE *ftree; double mPD[NS], PD1[NS]; /* distribution of partition distances */ puts("Tree file name?"); scanf ("%s", treef); ftree=gfopen (treef,"r"); fscanf (ftree, "%d%d", &com.ns, &ntree); printf("%2d sequences %2d trees.\n", com.ns, ntree); i=(com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); lpart = (com.ns-1)*com.ns*sizeof(char); i = ntree*lpart; printf("\n%d bytes of space requested.\n", i); partition = (char*)malloc(i); nib = (int*)malloc(ntree*sizeof(int)); if (partition==NULL || nib==NULL) error2("out of memory"); puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n"); fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout); for (i=0; i<ntree; i++) { ReadTreeN (ftree, &j, &k, 0, 1); nib[i]=tree.nbranch-com.ns; Tree2Partition(partition+i*lpart); } for(k=0; k<com.ns-3; k++) mPD[k]=0; for (i=0; i<ntree; i++,FPN(fout)) { for(k=0; k<com.ns-3; k++) PD1[k]=0; for (j=0; j<ntree; j++) { if(j==i) continue; nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame); PD1[nsame] ++; } for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.); for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.); printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100); for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]); fprintf(fout, "%8d:", i+1); for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]); printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r")); } free(partition); free(nodes); free(nib); fclose(ftree); exit(0); }
int ListTrees (FILE* fout, int ns, int rooted) { /* list trees by adding species, works fine with large ns */ int NTrees, NTreeRoot=3; int i, Ib[NS-2], ns1=ns+rooted, nM=ns1-3, finish; if(com.ns<=12) { printf ("%20s%20s%20s\n", "Taxa", "Unrooted trees", "Rooted trees"); for (i=4,NTrees=1; i<=com.ns; i++) printf ("%20d%20d%20d\n", i, (NTrees*=2*i-5), (NTreeRoot*=2*i-3)); fprintf (fout, "%10d %10d\n", com.ns, (!rooted?NTrees:NTreeRoot)); } if(com.ns<=26) { for (i=0; i<com.ns; i++) sprintf(com.spname[i], "%d", i+1); } for (i=0;i<nM;i++) Ib[i]=0; for (NTrees=0; ; ) { MakeTreeIb(ns, Ib, rooted); OutTreeN(fout, (com.ns<=26), 0); if(rooted) fprintf(fout, " [%7d %6d LHs]\n", NTrees++, CountLHsTree()); else fprintf(fout, " [%7d]\n", NTrees++); for (i=nM-1,Ib[nM-1]++,finish=0; i>=0; i--) { if (Ib[i]<2*i+3) break; if (i==0) { finish=1; break; } Ib[i]=0; Ib[i-1]++; } if (finish) break; } FPN(fout); return (0); }
int main (int argc, char*argv[]) { char *MCctlf=NULL, outf[512]="evolver.out", treefile[512]="mcmc.txt", mastertreefile[512]="\0"; int i, option=-1, ntree=1,rooted, BD=0, gotoption=0, pick1tree=-1; double bfactor=1, birth=-1,death=-1,sample=-1,mut=-1, *space; FILE *fout=gfopen(outf,"w"); printf("EVOLVER in %s\n", pamlVerStr); com.alpha=0; com.cleandata=1; com.model=0; com.NSsites=0; if(argc>1) { gotoption=1; sscanf(argv[1], "%d", &option); } if(argc==1) printf("Results for options 1-4 & 8 go into %s\n",outf); else if(option!=5 && option!=6 && option!=7 && option!=9) { puts("Usage: \n\tevolver \n\tevolver option# MyDataFile"); exit(-1); } if(option>=4 && option<=6) MCctlf = argv[2]; else if(option==9) { strcpy(treefile, argv[2]); if(argc>3) strcpy(mastertreefile, argv[3]); if(argc>4) sscanf(argv[4], "%d", &pick1tree); } #if defined (CodonNSbranches) option=6; com.model=1; MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranches.dat"); gotoption = 1; #elif defined (CodonNSsites) option=6; com.NSsites=3; MCctlf = (argc==3 ? argv[2] : "MCcodonNSsites.dat"); gotoption = 1; #elif defined (CodonNSbranchsites) option=6; com.model=1; com.NSsites=3; MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranchsites.dat"); gotoption = 1; #endif if(!gotoption) { for(; ;) { fflush(fout); printf("\n\t(1) Get random UNROOTED trees?\n"); printf("\t(2) Get random ROOTED trees?\n"); printf("\t(3) List all UNROOTED trees?\n"); printf("\t(4) List all ROOTED trees?\n"); printf("\t(5) Simulate nucleotide data sets (use %s)?\n",MCctlf0[0]); printf("\t(6) Simulate codon data sets (use %s)?\n",MCctlf0[1]); printf("\t(7) Simulate amino acid data sets (use %s)?\n",MCctlf0[2]); printf("\t(8) Calculate identical bi-partitions between trees?\n"); printf("\t(9) Calculate clade support values (evolver 9 treefile mastertreefile <pick1tree>)?\n"); printf("\t(11) Label clades?\n"); printf("\t(0) Quit?\n"); option = 9; scanf("%d", &option); if(option==0) exit(0); if(option>=5 && option<=7) break; if(option<5) { printf ("No. of species: "); scanf ("%d", &com.ns); } if(com.ns>NS) error2 ("Too many species. Raise NS."); if((space=(double*)malloc(10000*sizeof(double)))==NULL) error2("oom"); rooted = !(option%2); if(option<3) { printf("\nnumber of trees & random number seed? "); scanf("%d%d", &ntree, &i); SetSeed(i, 1); printf ("Want branch lengths from the birth-death process (0/1)? "); scanf ("%d", &BD); } if(option<=4) { if(com.ns<3) error2("no need to do this?"); i = (com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i)) == NULL) error2("oom"); } switch (option) { case(1): /* random UNROOTED trees */ case(2): /* random ROOTED trees */ /* default names */ if(com.ns<=52) for(i=0; i<com.ns; i++) sprintf(com.spname[i], "%c", (i<26 ? 'A'+i : 'a'+i-26)); else for(i=0; i<com.ns; i++) sprintf(com.spname[i], "S%d", i+1); if(BD) { printf ("\nbirth rate, death rate, sampling fraction, and "); printf ("mutation rate (tree height)?\n"); scanf ("%lf%lf%lf%lf", &birth, &death, &sample, &mut); } for(i=0;i<ntree;i++) { RandomLHistory (rooted, space); if(BD) BranchLengthBD (1, birth, death, sample, mut); if(com.ns<20&&ntree<10) { OutTreeN(F0, 0, BD); puts("\n"); } OutTreeN(fout, 1, BD); FPN(fout); } /* for (i=0; i<com.ns-2-!rooted; i++) Ib[i] = (int)((3.+i)*rndu()); MakeTreeIb (com.ns, Ib, rooted); */ break; case(3): case(4): ListTrees(fout, com.ns, rooted); break; case(8): TreeDistances(fout); break; case(9): printf("tree file names? "); scanf("%s%s", treefile, mastertreefile); break; case(10): between_f_and_x(); break; case(11): LabelClades(fout); break; default: exit(0); } } } if(option>=5 && option<=7) { com.seqtype = option-5; /* 0, 1, 2 for bases, codons, & amino acids */ Simulate(MCctlf ? MCctlf : MCctlf0[option-5]); } else if(option==9) { CladeSupport(fout, treefile, mastertreefile, pick1tree); /* CladeMrBayesProbabilities("/papers/BPPJC3sB/Karol.trees"); */ } return(0); }
void TreeDistances (FILE* fout) { /* I think this is broken after i changed the routine Tree2Partition(). */ int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0; char treef[64]="5s.all.trees", *partition; FILE *ftree; double psame, mp, vp; /* TreeDistanceDistribution(fout); */ puts("\nNumber of identical bi-partitions between trees.\nTree file name?"); scanf ("%s", treef); ftree=gfopen (treef,"r"); fscanf (ftree, "%d%d", &com.ns, &ntree); printf("%2d sequences %2d trees.\n", com.ns, ntree); i=(com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); if(ntree<2) error2("ntree"); printf ("\n%d species, %d trees\n", com.ns, ntree); puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n"); k=2; scanf("%d", &k); lpart=(com.ns-1)*com.ns*sizeof(char); i=(k==1?2:ntree)*lpart; printf("\n%d bytes of space requested.\n", i); partition=(char*)malloc(i); nib=(int*)malloc(ntree*sizeof(int)); if (partition==NULL || nib==NULL) error2("out of memory"); if(k==2) { /* pairwise comparisons */ fputs("Number of identical bi-partitions in pairwise comparisons\n",fout); for (i=0; i<ntree; i++) { ReadTreeN (ftree, &j, &k, 0, 1); nib[i]=tree.nbranch-com.ns; Tree2Partition(partition+i*lpart); } for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) { printf("%2d (%2d):", i+1,nib[i]); fprintf(fout,"%2d (%2d):", i+1,nib[i]); for (j=0; j<i; j++) { nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame); printf(" %2d", nsame); fprintf(fout," %2d", nsame); } } } else { /* first vs. others */ ReadTreeN (ftree, &j, &k, 0, 1); nib[0]=tree.nbranch-com.ns; if (nib[0]==0) error2("1st tree is a star tree.."); Tree2Partition (partition); fputs ("Comparing the first tree with the others\nFirst tree:\n",fout); OutTreeN(fout,0,0); FPN(fout); OutTreeB(fout); FPN(fout); fputs ("\nInternal branches in the first tree:\n",fout); FOR(i,nib[0]) { k=parti2B[i]; fprintf(fout,"%3d (%2d..%-2d): ( ", i+1,tree.branches[k][0]+1,tree.branches[k][1]+1); FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1); fputs(")\n",fout); } if(nodes[tree.root].nson<=2) fputs("\nRooted tree, results may not be correct.\n",fout); fputs("\nCorrect internal branches compared with the 1st tree:\n",fout); FOR(k,nib[0]) nIBsame[k]=0; for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) { ReadTreeN (ftree, &j, &k, 0, 1); nib[1]=tree.nbranch-com.ns; Tree2Partition(partition+lpart); nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame); psame=nsame/(double)nib[0]; FOR(k,nib[0]) nIBsame[k]+=IBsame[k]; fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame); FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1); printf("1 vs. %5d: %6d/%d %10.4f\n", i+1,nsame,nib[0],psame); vp += square(psame - mp)*(i-1.)/i; mp=(mp*(i-1.) + psame)/i; } vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.)))); fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n"); fprintf(fout,"between the 1st and all the other %d trees ", ntree-1); fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp); fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1); fprintf(fout,"interior branches of tree 1 are present"); fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout); FOR(k,nib[0]) { i=tree.branches[parti2B[k]][0]+1; j=tree.branches[parti2B[k]][1]+1; fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n", k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.)); }
void LabelClades(FILE *fout) { /* This reads in a tree and scan species names to check whether they form a paraphyletic group and then label the clade. It assumes that the tree is unrooted, and so goes through two rounds to check whether the remaining seqs form a monophyletic clade. */ FILE *ftree; int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength; char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end"; int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8; int debug; printf("Tree file name? "); scanf ("%s", treef); printf("Treat tree as unrooted (0 no, 1 yes)? "); scanf ("%d", &unrooted); ftree = gfopen (treef,"r"); fscanf (ftree, "%d%d", &com.ns, &j); if(com.ns<=0) error2("need ns in tree file"); debug = (com.ns<20); i = (com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); for(i=0; i<com.ns*2-1; i++) nodes[i].nodeStr = NULL; for(i=0; i<com.ns-1; i++) { anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int)); if(anc[i]==NULL) error2("oom"); } ReadTreeN(ftree, &haslength, &j, 1, 0); fclose(ftree); if(debug) { OutTreeN(F0, 1, PrNodeNum); FPN(F0); } for(iclade=0; iclade<com.ns-1; iclade++) { printf("\nString for selecting sequences (followed by non-digit) (end to end)? "); scanf("%s", key); if(strcmp(endstr, key) == 0) break; for(i=0; i<com.ns; i++) chosen[i] = '\0'; k = strlen(key); for(i=0; i<com.ns; i++) { if( (p=strstr(com.spname[i], key)) && !isdigit(p[k]) ) chosen[i] = 1; } /* for(i=0; i<com.ns; i++) if(strstr(com.spname[i], key)) chosen[i] = 1; */ /* look for MRCA, going through two rounds, assuming unrooted tree */ for(imrca=0; imrca<1+unrooted; imrca++) { if(imrca) for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i]; for(i=0,sizeclade=0; i<com.ns; i++) if(chosen[i]) { sizeclade ++; lasts = i; } if(sizeclade <= 1 || sizeclade >= com.ns-1) { puts("unable to form a clade. <2 seqs."); break; } for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++) anc[i][j] = 0; for(is=0; is<com.ns; is++) { if(chosen[is]==0) continue; loc = is/SI; bitmask = 1 << (is%SI); for(j=nodes[is].father; j!=-1; j=nodes[j].father) { anc[j-com.ns][loc] |= bitmask; if(is==lasts) { for(i=0,k=0; i<com.ns; i++) if(anc[j-com.ns][i/SI] & (1<<(i%SI))) k ++; if(k==sizeclade) { mrca = j; break; } } } } if(imrca==0 && mrca!=tree.root) /* 1st round is enough */ break; } if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) { printf("Unable to label. Ignored."); continue; } if(debug) for(is=0; is<com.ns-1; is++) { printf("\nnode %4d: ", is+com.ns); for(j=0; j<com.ns; j++) { loc = j/SI; bitmask = 1 << (j%SI); printf(" %d", (anc[is][loc] & bitmask) != 0); } } printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1); for(is=0,paraphyl=0; is<com.ns; is++) { if(chosen[is] == 0) for(j=nodes[is].father; j!=-1; j=nodes[j].father) if(j==mrca) { paraphyl++; break; } } if(paraphyl) printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl); nodes[mrca].label = iclade+1; if(debug) OutTreeN(F0, 1, haslength|PrLabel); } for(i=0; i<com.ns-1; i++) free(anc[i]); OutTreeN(fout, 1, haslength|PrLabel); FPN(fout); printf("Printed final tree with labels in evolver.out\n"); exit(0); }
int main (int argc, char *argv[]) { FILE *ftree, *fout, *fseq; char ctlf[32]="pamp.ctl"; char *Seqstr[]= {"nucleotide", "", "amino-acid", "Binary"}; int itree, ntree, i, j, s3; double *space, *Ft; #ifdef __MWERKS__ /* Added by Andrew Rambaut to accommodate Macs - Brings up dialog box to allow command line parameters. */ argc=ccommand(&argv); #endif com.nhomo=1; com.print=1; noisy=2; com.ncatG=8; com.clock=0; com.cleandata=1; GetOptions (ctlf); if(argc>1) { strcpy(ctlf, argv[1]); printf("\nctlfile set to %s.\n",ctlf); } if ((fseq=fopen(com.seqf, "r"))==NULL) error2 ("seqfile err."); if ((fout=fopen (com.outf, "w"))==NULL) error2("outfile creation err."); if((fseq=fopen (com.seqf,"r"))==NULL) error2("No sequence file!"); ReadSeq (NULL, fseq); i=(com.ns*2-1)*sizeof(struct TREEN); if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom"); fprintf (fout,"PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]); if (com.nhomo) fprintf (fout, "nonhomogeneous model\n"); space = (double*)malloc(50000*sizeof(double)); /* *** */ SeqDistance=(double*)malloc(com.ns*(com.ns-1)/2*sizeof(double)); ancestor=(int*)malloc(com.ns*(com.ns-1)/2*sizeof(int)); if (SeqDistance==NULL||ancestor==NULL) error2("oom"); i = com.ns*(com.ns-1)/2; s3 = sizeof(double)*((com.ns*2-2)*(com.ns*2-2 + 4 + i) + i); s3 = max2(s3, com.ncode*com.ncode*(2*com.ns-2+1)*(int)sizeof(double)); Ft = (double*) malloc(s3); if (space==NULL || Ft==NULL) error2 ("oom space"); Initialize (fout); if (com.ngene>1) error2 ("option G not allowed yet"); /* PatternLS (fout, Ft, 0., space, &i); printf ("\nPairwise estimation of rate matrix done..\n"); fflush(fout); */ ftree=gfopen (com.treef,"r"); fscanf (ftree, "%d%d", &i, &ntree); if (i!=com.ns) error2 ("ns in the tree file"); FOR (itree, ntree) { printf ("\nTREE # %2d\n", itree+1); fprintf (fout,"\nTREE # %2d\n", itree+1); if (ReadaTreeN (ftree, &i,&j, 1)) error2 ("err tree.."); OutaTreeN (F0, 0, 0); FPN (F0); OutaTreeN (fout, 0, 0); FPN (fout); for (i=0,maxchange=0; i<NCATCHANGE; i++) NSiteChange[i]=0; PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 0); printf ("\nHartigan reconstruction done..\n"); fprintf (fout, "\n\n(1) Branch lengths and substitution pattern\n"); PatternMP (fout, Ft); printf ("pattern done..\n"); fflush(fout); fprintf (fout, "\n\n(2) Gamma parameter\n"); AlphaMP (fout); printf ("gamma done..\n"); fflush(fout); fprintf (fout, "\n\n(3) Parsimony reconstructions\n"); PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 1); printf ("Yang reconstruction done..\n"); fflush(fout); }