Esempio n. 1
0
void main (int argc, char*argv[])
{
   int  lline=32000, i,j, ch, jeffnode, inode;
   char line[32000], mcmcf[96]="o.multidivtime";
   FILE *fmcmc;
   double t;

   puts("Usage:\n\tTreeTimeJeff <MultidivtimeOutputFile>\n");
   if(argc>1) strcpy(mcmcf, argv[1]);
   fmcmc=gfopen(mcmcf, "r");

   /* Read root node number */
   for( ; ; ) {
	   if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file");
      if(strstr(line, "Root node number of master tree is")==NULL) continue;
      sscanf(line+35, "%d", &j);
      com.ns=j/2+1;
      break;
   }
   printf("Tree has %d taxa.\n\n", com.ns);

   /* read tree.  JeffNode read into [].branch */
   for(; ; ) {
	   ch=fgetc(fmcmc);
	   if(ch==EOF) error2("EOF treefile");
	   if(ch=='(') 
         { ungetc(ch,fmcmc); break; }
   }
   ReadTreeN(fmcmc, &i, &j, 2, 0);
   OutTreeN(F0,1,0);  FPN(F0);  FPN(F0);

   /* read posterior time estimates */
   for(i=0; i<tree.nnode; i++) nodes[i].age=0;
   for( ; ; ) {
	   if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file");
      if(strstr(line, "Actual time node")==NULL) continue;
      sscanf(line+17, "%d =%lf", &jeffnode, &t);
      if(jeffnode<com.ns) {
         if(t>0) nodes[inode].age=t;
      }
      else {
         inode=tree.nnode-1+com.ns-jeffnode;
         printf("JeffNode %3d ZihengNode %3d time %9.6f\n", jeffnode,inode+1,t);
         nodes[inode].age=t;
         if(inode==com.ns) break;
         if(jeffnode-nodes[inode].branch != 0)
            printf(" node number error. ");
      }
   }
   for(i=0; i<tree.nnode; i++) 
      if(i!=tree.root) nodes[i].branch=nodes[nodes[i].father].age-nodes[i].age;

   FPN(F0);  OutTreeN(F0,1,1);  FPN(F0);
   fclose(fmcmc);
   exit(0);
}
Esempio n. 2
0
int ListTrees (FILE* fout, int ns, int rooted)
{
/* list trees by adding species, works fine with large ns
*/
   int NTrees, NTreeRoot=3;
   int i, Ib[NS-2], ns1=ns+rooted, nM=ns1-3, finish;

   if(com.ns<=12) {
      printf ("%20s%20s%20s\n", "Taxa", "Unrooted trees", "Rooted trees");
      for (i=4,NTrees=1; i<=com.ns; i++)  
         printf ("%20d%20d%20d\n", i, (NTrees*=2*i-5), (NTreeRoot*=2*i-3));
      fprintf (fout, "%10d %10d\n", com.ns, (!rooted?NTrees:NTreeRoot));
   }

   if(com.ns<=26) {
      for (i=0; i<com.ns; i++)
         sprintf(com.spname[i], "%d", i+1);
   }

   for (i=0;i<nM;i++) Ib[i]=0;
   for (NTrees=0; ; ) {
      MakeTreeIb(ns, Ib, rooted);
      OutTreeN(fout, (com.ns<=26), 0);

      if(rooted) fprintf(fout, " [%7d %6d LHs]\n", NTrees++, CountLHsTree());
      else fprintf(fout, " [%7d]\n", NTrees++);

      for (i=nM-1,Ib[nM-1]++,finish=0; i>=0; i--) {
         if (Ib[i]<2*i+3) break;
         if (i==0) { 
            finish=1; 
            break;
         }
         Ib[i]=0; Ib[i-1]++; 
      }
      if (finish) break;
   }
   FPN(fout);

   return (0);
}
Esempio n. 3
0
int main (int argc, char*argv[])
{
   char *MCctlf=NULL, outf[512]="evolver.out", treefile[512]="mcmc.txt", mastertreefile[512]="\0";
   int i, option=-1, ntree=1,rooted, BD=0, gotoption=0, pick1tree=-1;
   double bfactor=1, birth=-1,death=-1,sample=-1,mut=-1, *space;
   FILE *fout=gfopen(outf,"w");

   printf("EVOLVER in %s\n", pamlVerStr);
   com.alpha=0; com.cleandata=1; com.model=0; com.NSsites=0;

   if(argc>1) {
      gotoption=1;   sscanf(argv[1], "%d", &option);
   }
   if(argc==1)
      printf("Results for options 1-4 & 8 go into %s\n",outf);
   else if(option!=5 && option!=6 && option!=7 && option!=9) {
      puts("Usage: \n\tevolver \n\tevolver option# MyDataFile"); exit(-1); 
   }
   if(option>=4 && option<=6)
      MCctlf = argv[2];
   else if(option==9) {
      strcpy(treefile, argv[2]);
      if(argc>3) strcpy(mastertreefile, argv[3]);
      if(argc>4) sscanf(argv[4], "%d", &pick1tree);
   }

#if defined (CodonNSbranches)
   option=6;  com.model=1; 
   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranches.dat");
   gotoption = 1;
#elif defined (CodonNSsites)
   option=6;  com.NSsites=3; 
   MCctlf = (argc==3 ? argv[2] : "MCcodonNSsites.dat");
   gotoption = 1;
#elif defined (CodonNSbranchsites)
   option=6;  com.model=1; com.NSsites=3; 
   MCctlf = (argc==3 ? argv[2] : "MCcodonNSbranchsites.dat");
   gotoption = 1;
#endif

   if(!gotoption) {
      for(; ;) {
         fflush(fout);
         printf("\n\t(1) Get random UNROOTED trees?\n"); 
         printf("\t(2) Get random ROOTED trees?\n"); 
         printf("\t(3) List all UNROOTED trees?\n");
         printf("\t(4) List all ROOTED trees?\n");
         printf("\t(5) Simulate nucleotide data sets (use %s)?\n",MCctlf0[0]);
         printf("\t(6) Simulate codon data sets      (use %s)?\n",MCctlf0[1]);
         printf("\t(7) Simulate amino acid data sets (use %s)?\n",MCctlf0[2]);
         printf("\t(8) Calculate identical bi-partitions between trees?\n");
         printf("\t(9) Calculate clade support values (evolver 9 treefile mastertreefile <pick1tree>)?\n");
         printf("\t(11) Label clades?\n");
         printf("\t(0) Quit?\n");

         option = 9;
         scanf("%d", &option);

         if(option==0) exit(0);
         if(option>=5 && option<=7) break;
         if(option<5)  { 
            printf ("No. of species: ");
            scanf ("%d", &com.ns);
         }
         if(com.ns>NS) error2 ("Too many species.  Raise NS.");
         if((space=(double*)malloc(10000*sizeof(double)))==NULL) error2("oom");
         rooted = !(option%2);
         if(option<3) {
            printf("\nnumber of trees & random number seed? ");
            scanf("%d%d", &ntree, &i);
            SetSeed(i, 1);
            printf ("Want branch lengths from the birth-death process (0/1)? ");
            scanf ("%d", &BD);
         }
         if(option<=4) {
            if(com.ns<3) error2("no need to do this?");
            i = (com.ns*2-1)*sizeof(struct TREEN);
            if((nodes=(struct TREEN*)malloc(i)) == NULL) 
               error2("oom");
         }
         switch (option) {
         case(1):   /* random UNROOTED trees */
         case(2):   /* random ROOTED trees */
            /* default names */
            if(com.ns<=52)
               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "%c", (i<26 ? 'A'+i : 'a'+i-26));
            else
               for(i=0; i<com.ns; i++)  sprintf(com.spname[i], "S%d", i+1);

            if(BD) {
               printf ("\nbirth rate, death rate, sampling fraction, and ");
               printf ("mutation rate (tree height)?\n");
               scanf ("%lf%lf%lf%lf", &birth, &death, &sample, &mut);
            }
            for(i=0;i<ntree;i++) {
               RandomLHistory (rooted, space);
               if(BD)
                  BranchLengthBD (1, birth, death, sample, mut);
               if(com.ns<20&&ntree<10) { OutTreeN(F0, 0, BD); puts("\n"); }
               OutTreeN(fout, 1, BD);  FPN(fout);
            }
            /*
            for (i=0; i<com.ns-2-!rooted; i++)
               Ib[i] = (int)((3.+i)*rndu());
            MakeTreeIb (com.ns, Ib, rooted);
            */
            break;
         case(3):
         case(4): 
            ListTrees(fout, com.ns, rooted);
            break;
         case(8):  TreeDistances(fout);  break;
         case(9):  
            printf("tree file names? ");
            scanf("%s%s", treefile, mastertreefile);
            break;
         case(10): between_f_and_x();    break;
         case(11): LabelClades(fout);    break;
         default:  exit(0);
         }
      }
   }

   if(option>=5 && option<=7) {
      com.seqtype = option-5;  /* 0, 1, 2 for bases, codons, & amino acids */
      Simulate(MCctlf ? MCctlf : MCctlf0[option-5]);
   }
   else if(option==9) {
      CladeSupport(fout, treefile, mastertreefile, pick1tree);
      /* CladeMrBayesProbabilities("/papers/BPPJC3sB/Karol.trees"); */
   }
   return(0);
}
Esempio n. 4
0
void TreeDistances (FILE* fout)
{
/* I think this is broken after i changed the routine Tree2Partition().
*/
   int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0;
   char treef[64]="5s.all.trees", *partition;
   FILE *ftree;
   double psame, mp, vp;

   /*
   TreeDistanceDistribution(fout);
   */

   puts("\nNumber of identical bi-partitions between trees.\nTree file name?");
   scanf ("%s", treef);

   ftree=gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &ntree);
   printf("%2d sequences %2d trees.\n", com.ns, ntree);
   i=(com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");

   if(ntree<2) error2("ntree");
   printf ("\n%d species, %d trees\n", com.ns, ntree);
   puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n");
   k=2;
   scanf("%d", &k);

   lpart=(com.ns-1)*com.ns*sizeof(char);
   i=(k==1?2:ntree)*lpart;
   printf("\n%d bytes of space requested.\n", i);
   partition=(char*)malloc(i);
   nib=(int*)malloc(ntree*sizeof(int));
   if (partition==NULL || nib==NULL) error2("out of memory");

   if(k==2) {    /* pairwise comparisons */
      fputs("Number of identical bi-partitions in pairwise comparisons\n",fout);
      for (i=0; i<ntree; i++) {
         ReadTreeN (ftree, &j, &k, 0, 1); 
         nib[i]=tree.nbranch-com.ns;
         Tree2Partition(partition+i*lpart);
      }
      for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) {
         printf("%2d (%2d):", i+1,nib[i]);
         fprintf(fout,"%2d (%2d):", i+1,nib[i]);
         for (j=0; j<i; j++) {
            nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
            printf(" %2d", nsame);
            fprintf(fout," %2d", nsame);
         }
      }
   }
   else {  /* first vs. others */
      ReadTreeN (ftree, &j, &k, 0, 1);
      nib[0]=tree.nbranch-com.ns;
      if (nib[0]==0) error2("1st tree is a star tree..");
      Tree2Partition (partition);
      fputs ("Comparing the first tree with the others\nFirst tree:\n",fout);
      OutTreeN(fout,0,0);  FPN(fout);  OutTreeB(fout);  FPN(fout); 
      fputs ("\nInternal branches in the first tree:\n",fout);
      FOR(i,nib[0]) { 
         k=parti2B[i];
         fprintf(fout,"%3d (%2d..%-2d): ( ",
            i+1,tree.branches[k][0]+1,tree.branches[k][1]+1);
         FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1);
         fputs(")\n",fout);
      }
      if(nodes[tree.root].nson<=2) 
         fputs("\nRooted tree, results may not be correct.\n",fout);
      fputs("\nCorrect internal branches compared with the 1st tree:\n",fout);
      FOR(k,nib[0]) nIBsame[k]=0;
      for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) {
         ReadTreeN (ftree, &j, &k, 0, 1); 
         nib[1]=tree.nbranch-com.ns;
         Tree2Partition(partition+lpart);
         nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame);

         psame=nsame/(double)nib[0];
         FOR(k,nib[0]) nIBsame[k]+=IBsame[k];
         fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame);
         FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1);
         printf("1 vs. %5d: %6d/%d  %10.4f\n", i+1,nsame,nib[0],psame);
         vp += square(psame - mp)*(i-1.)/i;
         mp=(mp*(i-1.) + psame)/i;
      }
      vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.))));
      fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n");
      fprintf(fout,"between the 1st and all the other %d trees ", ntree-1);
      fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp);
      fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1);
      fprintf(fout,"interior branches of tree 1 are present");
      fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout);
      FOR(k,nib[0]) { 
         i=tree.branches[parti2B[k]][0]+1;  j=tree.branches[parti2B[k]][1]+1; 
         fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n",
            k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.));
      }
Esempio n. 5
0
void LabelClades(FILE *fout)
{
/* This reads in a tree and scan species names to check whether they form a 
   paraphyletic group and then label the clade.
   It assumes that the tree is unrooted, and so goes through two rounds to check
   whether the remaining seqs form a monophyletic clade.
*/
   FILE *ftree;
   int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength;
   char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end";
   int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8;
   int debug;

   printf("Tree file name? ");
   scanf ("%s", treef);
   printf("Treat tree as unrooted (0 no, 1 yes)? ");
   scanf ("%d", &unrooted);

   ftree = gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &j);
   if(com.ns<=0) error2("need ns in tree file");
   debug = (com.ns<20);

   i = (com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
   for(i=0; i<com.ns*2-1; i++)  nodes[i].nodeStr = NULL;
   for(i=0; i<com.ns-1; i++) {
      anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int));
      if(anc[i]==NULL)  error2("oom");
   }
   ReadTreeN(ftree, &haslength, &j, 1, 0);
   fclose(ftree);
   if(debug) { OutTreeN(F0, 1, PrNodeNum);  FPN(F0); }

   for(iclade=0; iclade<com.ns-1; iclade++) {
      printf("\nString for selecting sequences (followed by non-digit) (end to end)? ");
      scanf("%s", key);
      if(strcmp(endstr, key) == 0)
         break;
      for(i=0; i<com.ns; i++) 
         chosen[i] = '\0';


      k = strlen(key);
      for(i=0; i<com.ns; i++) {
         if( (p=strstr(com.spname[i], key)) 
            && !isdigit(p[k]) )
               chosen[i] = 1;
      }

      /*
      for(i=0; i<com.ns; i++) 
         if(strstr(com.spname[i], key)) chosen[i] = 1;
      */

      /* look for MRCA, going through two rounds, assuming unrooted tree */
      for(imrca=0; imrca<1+unrooted; imrca++) {
         if(imrca) 
            for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i]; 

         for(i=0,sizeclade=0; i<com.ns; i++) 
            if(chosen[i]) {
               sizeclade ++;
               lasts = i;
            }

         if(sizeclade <= 1 || sizeclade >= com.ns-1) {
            puts("unable to form a clade.  <2 seqs.");
            break;
         }
         for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++) 
            anc[i][j] = 0;
         for(is=0; is<com.ns; is++) {
            if(chosen[is]==0) continue;
            loc = is/SI;  bitmask = 1 << (is%SI);
            for(j=nodes[is].father; j!=-1; j=nodes[j].father) {
               anc[j-com.ns][loc] |= bitmask;
               if(is==lasts) {
                  for(i=0,k=0; i<com.ns; i++)
                     if(anc[j-com.ns][i/SI] & (1<<(i%SI)))
                        k ++;
                  if(k==sizeclade) {
                     mrca = j;  break;
                  }
               }
            }
         }
         if(imrca==0 && mrca!=tree.root) /* 1st round is enough */
            break;
      }

      if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) {
         printf("Unable to label.  Ignored.");
         continue;
      }

      if(debug) 
         for(is=0; is<com.ns-1; is++) {
            printf("\nnode %4d: ", is+com.ns);
            for(j=0; j<com.ns; j++) {
               loc = j/SI;  bitmask = 1 << (j%SI);
               printf(" %d", (anc[is][loc] & bitmask) != 0);
            }
         }

      printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1);
      for(is=0,paraphyl=0; is<com.ns; is++) {
         if(chosen[is] == 0)
            for(j=nodes[is].father; j!=-1; j=nodes[j].father)
               if(j==mrca) { paraphyl++;  break; }
      }
      if(paraphyl) 
         printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl);

      nodes[mrca].label = iclade+1;
      if(debug) OutTreeN(F0, 1, haslength|PrLabel);
   }

   for(i=0; i<com.ns-1; i++)  free(anc[i]);
   OutTreeN(fout, 1, haslength|PrLabel);  FPN(fout);
   printf("Printed final tree with labels in evolver.out\n");
   exit(0);
}