Beispiel #1
void main (int argc, char*argv[])
   int  lline=32000, i,j, ch, jeffnode, inode;
   char line[32000], mcmcf[96]="o.multidivtime";
   FILE *fmcmc;
   double t;

   puts("Usage:\n\tTreeTimeJeff <MultidivtimeOutputFile>\n");
   if(argc>1) strcpy(mcmcf, argv[1]);
   fmcmc=gfopen(mcmcf, "r");

   /* Read root node number */
   for( ; ; ) {
	   if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file");
      if(strstr(line, "Root node number of master tree is")==NULL) continue;
      sscanf(line+35, "%d", &j);
   printf("Tree has %d taxa.\n\n", com.ns);

   /* read tree.  JeffNode read into [].branch */
   for(; ; ) {
	   if(ch==EOF) error2("EOF treefile");
         { ungetc(ch,fmcmc); break; }
   ReadTreeN(fmcmc, &i, &j, 2, 0);
   OutTreeN(F0,1,0);  FPN(F0);  FPN(F0);

   /* read posterior time estimates */
   for(i=0; i<tree.nnode; i++) nodes[i].age=0;
   for( ; ; ) {
	   if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file");
      if(strstr(line, "Actual time node")==NULL) continue;
      sscanf(line+17, "%d =%lf", &jeffnode, &t);
      if(jeffnode<com.ns) {
         if(t>0) nodes[inode].age=t;
      else {
         printf("JeffNode %3d ZihengNode %3d time %9.6f\n", jeffnode,inode+1,t);
         if(inode==com.ns) break;
         if(jeffnode-nodes[inode].branch != 0)
            printf(" node number error. ");
   for(i=0; i<tree.nnode; i++) 
      if(i!=tree.root) nodes[i].branch=nodes[nodes[i].father].age-nodes[i].age;

   FPN(F0);  OutTreeN(F0,1,1);  FPN(F0);
void TreeDistanceDistribution (FILE* fout)
/* This calculates figure 3.7 of Yang (2006).
   This reads the file of all trees (such as 7s.all.trees), and calculates the 
   distribution of partition distance in all pairwise comparisons.
   int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0;
   char treef[64]="5s.all.trees", *partition;
   FILE *ftree;
   double mPD[NS], PD1[NS];  /* distribution of partition distances */

   puts("Tree file name?");
   scanf ("%s", treef);

   ftree=gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &ntree);
   printf("%2d sequences %2d trees.\n", com.ns, ntree);
   i=(com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");

   lpart = (com.ns-1)*com.ns*sizeof(char);
   i = ntree*lpart;
   printf("\n%d bytes of space requested.\n", i);
   partition = (char*)malloc(i);
   nib = (int*)malloc(ntree*sizeof(int));
   if (partition==NULL || nib==NULL) error2("out of memory");

   puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n");
   fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout);
   for (i=0; i<ntree; i++) {
      ReadTreeN (ftree, &j, &k, 0, 1); 
   for(k=0; k<com.ns-3; k++) mPD[k]=0;
   for (i=0; i<ntree; i++,FPN(fout)) {
      for(k=0; k<com.ns-3; k++) PD1[k]=0;
      for (j=0; j<ntree; j++) {
         if(j==i) continue;
         nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
         PD1[nsame] ++;
      for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.);
      for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.);
      printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100);
      for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]);
      fprintf(fout, "%8d:", i+1);  for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]);
      printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r"));
   free(partition); free(nodes); free(nib); fclose(ftree);
Beispiel #3
int ListTrees (FILE* fout, int ns, int rooted)
/* list trees by adding species, works fine with large ns
   int NTrees, NTreeRoot=3;
   int i, Ib[NS-2], ns1=ns+rooted, nM=ns1-3, finish;

   if(com.ns<=12) {
      printf ("%20s%20s%20s\n", "Taxa", "Unrooted trees", "Rooted trees");
      for (i=4,NTrees=1; i<=com.ns; i++)  
         printf ("%20d%20d%20d\n", i, (NTrees*=2*i-5), (NTreeRoot*=2*i-3));
      fprintf (fout, "%10d %10d\n", com.ns, (!rooted?NTrees:NTreeRoot));

   if(com.ns<=26) {
      for (i=0; i<com.ns; i++)
         sprintf(com.spname[i], "%d", i+1);

   for (i=0;i<nM;i++) Ib[i]=0;
   for (NTrees=0; ; ) {
      MakeTreeIb(ns, Ib, rooted);
      OutTreeN(fout, (com.ns<=26), 0);

      if(rooted) fprintf(fout, " [%7d %6d LHs]\n", NTrees++, CountLHsTree());
      else fprintf(fout, " [%7d]\n", NTrees++);

      for (i=nM-1,Ib[nM-1]++,finish=0; i>=0; i--) {
         if (Ib[i]<2*i+3) break;
         if (i==0) { 
         Ib[i]=0; Ib[i-1]++; 
      if (finish) break;

   return (0);
int main (int argc, char*argv[])
   char *MCctlf=NULL, outf[512]="evolver.out", treefile[512]="mcmc.txt", mastertreefile[512]="\0";
   int i, option=-1, ntree=1,rooted, BD=0, gotoption=0, pick1tree=-1;
   double bfactor=1, birth=-1,death=-1,sample=-1,mut=-1, *space;
   FILE *fout=gfopen(outf,"w");

   printf("EVOLVER in %s\n", pamlVerStr);
   com.alpha=0; com.cleandata=1; com.model=0; com.NSsites=0;

   if(argc>1) {
      gotoption=1;   sscanf(argv[1], "%d", &option);
      printf("Results for options 1-4 & 8 go into %s\n",outf);
   else if(option!=5 && option!=6 && option!=7 && option!=9) {
      puts("Usage: \n\tevolver \n\tevolver option# MyDataFile"); exit(-1); 
   if(option>=4 && option<=6)
      MCctlf = argv[2];
   else if(option==9) {
      strcpy(treefile, argv[2]);
      if(argc>3) strcpy(mastertreefile, argv[3]);
      if(argc>4) sscanf(argv[4], "%d", &pick1tree);

#if defined (CodonNSbranches)
   option=6;  com.model=1; 
   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranches.dat");
   gotoption = 1;
#elif defined (CodonNSsites)
   option=6;  com.NSsites=3; 
   MCctlf = (argc==3 ? argv[2] : "MCcodonNSsites.dat");
   gotoption = 1;
#elif defined (CodonNSbranchsites)
   option=6;  com.model=1; com.NSsites=3; 
   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranchsites.dat");
   gotoption = 1;

   if(!gotoption) {
      for(; ;) {
         printf("\n\t(1) Get random UNROOTED trees?\n"); 
         printf("\t(2) Get random ROOTED trees?\n"); 
         printf("\t(3) List all UNROOTED trees?\n");
         printf("\t(4) List all ROOTED trees?\n");
         printf("\t(5) Simulate nucleotide data sets (use %s)?\n",MCctlf0[0]);
         printf("\t(6) Simulate codon data sets      (use %s)?\n",MCctlf0[1]);
         printf("\t(7) Simulate amino acid data sets (use %s)?\n",MCctlf0[2]);
         printf("\t(8) Calculate identical bi-partitions between trees?\n");
         printf("\t(9) Calculate clade support values (evolver 9 treefile mastertreefile <pick1tree>)?\n");
         printf("\t(11) Label clades?\n");
         printf("\t(0) Quit?\n");

         option = 9;
         scanf("%d", &option);

         if(option==0) exit(0);
         if(option>=5 && option<=7) break;
         if(option<5)  { 
            printf ("No. of species: ");
            scanf ("%d", &com.ns);
         if(com.ns>NS) error2 ("Too many species.  Raise NS.");
         if((space=(double*)malloc(10000*sizeof(double)))==NULL) error2("oom");
         rooted = !(option%2);
         if(option<3) {
            printf("\nnumber of trees & random number seed? ");
            scanf("%d%d", &ntree, &i);
            SetSeed(i, 1);
            printf ("Want branch lengths from the birth-death process (0/1)? ");
            scanf ("%d", &BD);
         if(option<=4) {
            if(com.ns<3) error2("no need to do this?");
            i = (com.ns*2-1)*sizeof(struct TREEN);
            if((nodes=(struct TREEN*)malloc(i)) == NULL) 
         switch (option) {
         case(1):   /* random UNROOTED trees */
         case(2):   /* random ROOTED trees */
            /* default names */
               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "%c", (i<26 ? 'A'+i : 'a'+i-26));
               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "S%d", i+1);

            if(BD) {
               printf ("\nbirth rate, death rate, sampling fraction, and ");
               printf ("mutation rate (tree height)?\n");
               scanf ("%lf%lf%lf%lf", &birth, &death, &sample, &mut);
            for(i=0;i<ntree;i++) {
               RandomLHistory (rooted, space);
                  BranchLengthBD (1, birth, death, sample, mut);
               if(com.ns<20&&ntree<10) { OutTreeN(F0, 0, BD); puts("\n"); }
               OutTreeN(fout, 1, BD);  FPN(fout);
            for (i=0; i<com.ns-2-!rooted; i++)
               Ib[i] = (int)((3.+i)*rndu());
            MakeTreeIb (com.ns, Ib, rooted);
            ListTrees(fout, com.ns, rooted);
         case(8):  TreeDistances(fout);  break;
            printf("tree file names? ");
            scanf("%s%s", treefile, mastertreefile);
         case(10): between_f_and_x();    break;
         case(11): LabelClades(fout);    break;
         default:  exit(0);

   if(option>=5 && option<=7) {
      com.seqtype = option-5;  /* 0, 1, 2 for bases, codons, & amino acids */
      Simulate(MCctlf ? MCctlf : MCctlf0[option-5]);
   else if(option==9) {
      CladeSupport(fout, treefile, mastertreefile, pick1tree);
      /* CladeMrBayesProbabilities("/papers/BPPJC3sB/Karol.trees"); */
void TreeDistances (FILE* fout)
/* I think this is broken after i changed the routine Tree2Partition().
   int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0;
   char treef[64]="5s.all.trees", *partition;
   FILE *ftree;
   double psame, mp, vp;


   puts("\nNumber of identical bi-partitions between trees.\nTree file name?");
   scanf ("%s", treef);

   ftree=gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &ntree);
   printf("%2d sequences %2d trees.\n", com.ns, ntree);
   i=(com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");

   if(ntree<2) error2("ntree");
   printf ("\n%d species, %d trees\n", com.ns, ntree);
   puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n");
   scanf("%d", &k);

   printf("\n%d bytes of space requested.\n", i);
   if (partition==NULL || nib==NULL) error2("out of memory");

   if(k==2) {    /* pairwise comparisons */
      fputs("Number of identical bi-partitions in pairwise comparisons\n",fout);
      for (i=0; i<ntree; i++) {
         ReadTreeN (ftree, &j, &k, 0, 1); 
      for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) {
         printf("%2d (%2d):", i+1,nib[i]);
         fprintf(fout,"%2d (%2d):", i+1,nib[i]);
         for (j=0; j<i; j++) {
            nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
            printf(" %2d", nsame);
            fprintf(fout," %2d", nsame);
   else {  /* first vs. others */
      ReadTreeN (ftree, &j, &k, 0, 1);
      if (nib[0]==0) error2("1st tree is a star tree..");
      Tree2Partition (partition);
      fputs ("Comparing the first tree with the others\nFirst tree:\n",fout);
      OutTreeN(fout,0,0);  FPN(fout);  OutTreeB(fout);  FPN(fout); 
      fputs ("\nInternal branches in the first tree:\n",fout);
      FOR(i,nib[0]) { 
         fprintf(fout,"%3d (%2d..%-2d): ( ",
         FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1);
         fputs("\nRooted tree, results may not be correct.\n",fout);
      fputs("\nCorrect internal branches compared with the 1st tree:\n",fout);
      FOR(k,nib[0]) nIBsame[k]=0;
      for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) {
         ReadTreeN (ftree, &j, &k, 0, 1); 
         nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame);

         FOR(k,nib[0]) nIBsame[k]+=IBsame[k];
         fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame);
         FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1);
         printf("1 vs. %5d: %6d/%d  %10.4f\n", i+1,nsame,nib[0],psame);
         vp += square(psame - mp)*(i-1.)/i;
         mp=(mp*(i-1.) + psame)/i;
      vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.))));
      fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n");
      fprintf(fout,"between the 1st and all the other %d trees ", ntree-1);
      fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp);
      fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1);
      fprintf(fout,"interior branches of tree 1 are present");
      fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout);
      FOR(k,nib[0]) { 
         i=tree.branches[parti2B[k]][0]+1;  j=tree.branches[parti2B[k]][1]+1; 
         fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n",
void LabelClades(FILE *fout)
/* This reads in a tree and scan species names to check whether they form a 
   paraphyletic group and then label the clade.
   It assumes that the tree is unrooted, and so goes through two rounds to check
   whether the remaining seqs form a monophyletic clade.
   FILE *ftree;
   int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength;
   char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end";
   int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8;
   int debug;

   printf("Tree file name? ");
   scanf ("%s", treef);
   printf("Treat tree as unrooted (0 no, 1 yes)? ");
   scanf ("%d", &unrooted);

   ftree = gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &j);
   if(com.ns<=0) error2("need ns in tree file");
   debug = (com.ns<20);

   i = (com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
   for(i=0; i<com.ns*2-1; i++)  nodes[i].nodeStr = NULL;
   for(i=0; i<com.ns-1; i++) {
      anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int));
      if(anc[i]==NULL)  error2("oom");
   ReadTreeN(ftree, &haslength, &j, 1, 0);
   if(debug) { OutTreeN(F0, 1, PrNodeNum);  FPN(F0); }

   for(iclade=0; iclade<com.ns-1; iclade++) {
      printf("\nString for selecting sequences (followed by non-digit) (end to end)? ");
      scanf("%s", key);
      if(strcmp(endstr, key) == 0)
      for(i=0; i<com.ns; i++) 
         chosen[i] = '\0';

      k = strlen(key);
      for(i=0; i<com.ns; i++) {
         if( (p=strstr(com.spname[i], key)) 
            && !isdigit(p[k]) )
               chosen[i] = 1;

      for(i=0; i<com.ns; i++) 
         if(strstr(com.spname[i], key)) chosen[i] = 1;

      /* look for MRCA, going through two rounds, assuming unrooted tree */
      for(imrca=0; imrca<1+unrooted; imrca++) {
            for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i]; 

         for(i=0,sizeclade=0; i<com.ns; i++) 
            if(chosen[i]) {
               sizeclade ++;
               lasts = i;

         if(sizeclade <= 1 || sizeclade >= com.ns-1) {
            puts("unable to form a clade.  <2 seqs.");
         for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++) 
            anc[i][j] = 0;
         for(is=0; is<com.ns; is++) {
            if(chosen[is]==0) continue;
            loc = is/SI;  bitmask = 1 << (is%SI);
            for(j=nodes[is].father; j!=-1; j=nodes[j].father) {
               anc[j-com.ns][loc] |= bitmask;
               if(is==lasts) {
                  for(i=0,k=0; i<com.ns; i++)
                     if(anc[j-com.ns][i/SI] & (1<<(i%SI)))
                        k ++;
                  if(k==sizeclade) {
                     mrca = j;  break;
         if(imrca==0 && mrca!=tree.root) /* 1st round is enough */

      if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) {
         printf("Unable to label.  Ignored.");

         for(is=0; is<com.ns-1; is++) {
            printf("\nnode %4d: ", is+com.ns);
            for(j=0; j<com.ns; j++) {
               loc = j/SI;  bitmask = 1 << (j%SI);
               printf(" %d", (anc[is][loc] & bitmask) != 0);

      printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1);
      for(is=0,paraphyl=0; is<com.ns; is++) {
         if(chosen[is] == 0)
            for(j=nodes[is].father; j!=-1; j=nodes[j].father)
               if(j==mrca) { paraphyl++;  break; }
         printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl);

      nodes[mrca].label = iclade+1;
      if(debug) OutTreeN(F0, 1, haslength|PrLabel);

   for(i=0; i<com.ns-1; i++)  free(anc[i]);
   OutTreeN(fout, 1, haslength|PrLabel);  FPN(fout);
   printf("Printed final tree with labels in evolver.out\n");
Beispiel #7
int main (int argc, char *argv[])
    FILE *ftree, *fout, *fseq;
    char ctlf[32]="pamp.ctl";
    char *Seqstr[]= {"nucleotide", "", "amino-acid", "Binary"};
    int itree, ntree, i, j, s3;
    double *space, *Ft;

#ifdef __MWERKS__
    /* Added by Andrew Rambaut to accommodate Macs -
       Brings up dialog box to allow command line parameters.

    GetOptions (ctlf);
    if(argc>1) {
        strcpy(ctlf, argv[1]);
        printf("\nctlfile set to %s.\n",ctlf);

    if ((fseq=fopen(com.seqf, "r"))==NULL) error2 ("seqfile err.");
    if ((fout=fopen (com.outf, "w"))==NULL) error2("outfile creation err.");
    if((fseq=fopen (com.seqf,"r"))==NULL)  error2("No sequence file!");
    ReadSeq (NULL, fseq);
    i=(com.ns*2-1)*sizeof(struct TREEN);
    if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");

    fprintf (fout,"PAMP %15s, %s sequences\n", com.seqf, Seqstr[com.seqtype]);
    if (com.nhomo) fprintf (fout, "nonhomogeneous model\n");

    space = (double*)malloc(50000*sizeof(double));  /* *** */
    if (SeqDistance==NULL||ancestor==NULL) error2("oom");

    i = com.ns*(com.ns-1)/2;
    s3 = sizeof(double)*((com.ns*2-2)*(com.ns*2-2 + 4 + i) + i);
    s3 = max2(s3, com.ncode*com.ncode*(2*com.ns-2+1)*(int)sizeof(double));

    Ft = (double*) malloc(s3);
    if (space==NULL || Ft==NULL)  error2 ("oom space");

    Initialize (fout);
    if (com.ngene>1) error2 ("option G not allowed yet");

       PatternLS (fout, Ft, 0., space, &i);
       printf ("\nPairwise estimation of rate matrix done..\n");
    ftree=gfopen (com.treef,"r");
    fscanf (ftree, "%d%d", &i, &ntree);
    if (i!=com.ns) error2 ("ns in the tree file");

    FOR (itree, ntree) {

        printf ("\nTREE # %2d\n", itree+1);
        fprintf (fout,"\nTREE # %2d\n", itree+1);

        if (ReadaTreeN (ftree, &i,&j, 1)) error2 ("err tree..");
        OutaTreeN (F0, 0, 0);
        FPN (F0);
        OutaTreeN (fout, 0, 0);
        FPN (fout);

        for (i=0,maxchange=0; i<NCATCHANGE; i++) NSiteChange[i]=0;

        PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 0);
        printf ("\nHartigan reconstruction done..\n");

        fprintf (fout, "\n\n(1) Branch lengths and substitution pattern\n");
        PatternMP (fout, Ft);
        printf ("pattern done..\n");

        fprintf (fout, "\n\n(2) Gamma parameter\n");
        AlphaMP (fout);
        printf ("gamma done..\n");

        fprintf (fout, "\n\n(3) Parsimony reconstructions\n");
        PathwayMP1 (fout, &maxchange, NSiteChange, Ft, space, 1);
        printf ("Yang reconstruction done..\n");