예제 #1
0
void main (int argc, char*argv[])
{
   int  lline=32000, i,j, ch, jeffnode, inode;
   char line[32000], mcmcf[96]="o.multidivtime";
   FILE *fmcmc;
   double t;

   puts("Usage:\n\tTreeTimeJeff <MultidivtimeOutputFile>\n");
   if(argc>1) strcpy(mcmcf, argv[1]);
   fmcmc=gfopen(mcmcf, "r");

   /* Read root node number */
   for( ; ; ) {
	   if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file");
      if(strstr(line, "Root node number of master tree is")==NULL) continue;
      sscanf(line+35, "%d", &j);
      com.ns=j/2+1;
      break;
   }
   printf("Tree has %d taxa.\n\n", com.ns);

   /* read tree.  JeffNode read into [].branch */
   for(; ; ) {
	   ch=fgetc(fmcmc);
	   if(ch==EOF) error2("EOF treefile");
	   if(ch=='(') 
         { ungetc(ch,fmcmc); break; }
   }
   ReadTreeN(fmcmc, &i, &j, 2, 0);
   OutTreeN(F0,1,0);  FPN(F0);  FPN(F0);

   /* read posterior time estimates */
   for(i=0; i<tree.nnode; i++) nodes[i].age=0;
   for( ; ; ) {
	   if(fgets(line, lline, fmcmc) == NULL) error2("EOF mcmc file");
      if(strstr(line, "Actual time node")==NULL) continue;
      sscanf(line+17, "%d =%lf", &jeffnode, &t);
      if(jeffnode<com.ns) {
         if(t>0) nodes[inode].age=t;
      }
      else {
         inode=tree.nnode-1+com.ns-jeffnode;
         printf("JeffNode %3d ZihengNode %3d time %9.6f\n", jeffnode,inode+1,t);
         nodes[inode].age=t;
         if(inode==com.ns) break;
         if(jeffnode-nodes[inode].branch != 0)
            printf(" node number error. ");
      }
   }
   for(i=0; i<tree.nnode; i++) 
      if(i!=tree.root) nodes[i].branch=nodes[nodes[i].father].age-nodes[i].age;

   FPN(F0);  OutTreeN(F0,1,1);  FPN(F0);
   fclose(fmcmc);
   exit(0);
}
예제 #2
0
void TreeDistanceDistribution (FILE* fout)
{
/* This calculates figure 3.7 of Yang (2006).
   This reads the file of all trees (such as 7s.all.trees), and calculates the 
   distribution of partition distance in all pairwise comparisons.
*/
   int i,j,ntree, k,*nib, nsame, IBsame[NS], lpart=0;
   char treef[64]="5s.all.trees", *partition;
   FILE *ftree;
   double mPD[NS], PD1[NS];  /* distribution of partition distances */

   puts("Tree file name?");
   scanf ("%s", treef);

   ftree=gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &ntree);
   printf("%2d sequences %2d trees.\n", com.ns, ntree);
   i=(com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");

   lpart = (com.ns-1)*com.ns*sizeof(char);
   i = ntree*lpart;
   printf("\n%d bytes of space requested.\n", i);
   partition = (char*)malloc(i);
   nib = (int*)malloc(ntree*sizeof(int));
   if (partition==NULL || nib==NULL) error2("out of memory");

   puts("\ntree #: mean prop of tree pairs with 0 1 2 ... shared bipartitions\n");
   fputs("\ntree #: prop of tree pairs with 0 1 2 ... shared bipartitions\n",fout);
   for (i=0; i<ntree; i++) {
      ReadTreeN (ftree, &j, &k, 0, 1); 
      nib[i]=tree.nbranch-com.ns;
      Tree2Partition(partition+i*lpart);
   }
   for(k=0; k<com.ns-3; k++) mPD[k]=0;
   for (i=0; i<ntree; i++,FPN(fout)) {
      for(k=0; k<com.ns-3; k++) PD1[k]=0;
      for (j=0; j<ntree; j++) {
         if(j==i) continue;
         nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
         PD1[nsame] ++;
      }
      for(k=0; k<com.ns-3; k++) PD1[k] /= (ntree-1.);
      for(k=0; k<com.ns-3; k++) mPD[k] = (mPD[k]*i+PD1[k])/(i+1.);
      printf("%8d (%5.1f%%):", i+1,(i+1.)/ntree*100);
      for(k=0; k<com.ns-3; k++) printf(" %7.4f", mPD[k]);
      fprintf(fout, "%8d:", i+1);  for(k=0; k<com.ns-3; k++) fprintf(fout, " %7.4f", PD1[k]);
      printf("%s", (com.ns<8||(i+1)%100==0 ? "\n" : "\r"));
   }
   free(partition); free(nodes); free(nib); fclose(ftree);
   exit(0);
}
예제 #3
0
void TreeDistances (FILE* fout)
{
/* I think this is broken after i changed the routine Tree2Partition().
*/
   int i,j,ntree, k,*nib, parti2B[NS], nsame, IBsame[NS],nIBsame[NS], lpart=0;
   char treef[64]="5s.all.trees", *partition;
   FILE *ftree;
   double psame, mp, vp;

   /*
   TreeDistanceDistribution(fout);
   */

   puts("\nNumber of identical bi-partitions between trees.\nTree file name?");
   scanf ("%s", treef);

   ftree=gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &ntree);
   printf("%2d sequences %2d trees.\n", com.ns, ntree);
   i=(com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");

   if(ntree<2) error2("ntree");
   printf ("\n%d species, %d trees\n", com.ns, ntree);
   puts("\n\t1: first vs. rest?\n\t2: all pairwise comparisons?\n");
   k=2;
   scanf("%d", &k);

   lpart=(com.ns-1)*com.ns*sizeof(char);
   i=(k==1?2:ntree)*lpart;
   printf("\n%d bytes of space requested.\n", i);
   partition=(char*)malloc(i);
   nib=(int*)malloc(ntree*sizeof(int));
   if (partition==NULL || nib==NULL) error2("out of memory");

   if(k==2) {    /* pairwise comparisons */
      fputs("Number of identical bi-partitions in pairwise comparisons\n",fout);
      for (i=0; i<ntree; i++) {
         ReadTreeN (ftree, &j, &k, 0, 1); 
         nib[i]=tree.nbranch-com.ns;
         Tree2Partition(partition+i*lpart);
      }
      for (i=0; i<ntree; i++,FPN(F0),FPN(fout)) {
         printf("%2d (%2d):", i+1,nib[i]);
         fprintf(fout,"%2d (%2d):", i+1,nib[i]);
         for (j=0; j<i; j++) {
            nsame=NSameBranch(partition+i*lpart,partition+j*lpart, nib[i],nib[j],IBsame);
            printf(" %2d", nsame);
            fprintf(fout," %2d", nsame);
         }
      }
   }
   else {  /* first vs. others */
      ReadTreeN (ftree, &j, &k, 0, 1);
      nib[0]=tree.nbranch-com.ns;
      if (nib[0]==0) error2("1st tree is a star tree..");
      Tree2Partition (partition);
      fputs ("Comparing the first tree with the others\nFirst tree:\n",fout);
      OutTreeN(fout,0,0);  FPN(fout);  OutTreeB(fout);  FPN(fout); 
      fputs ("\nInternal branches in the first tree:\n",fout);
      FOR(i,nib[0]) { 
         k=parti2B[i];
         fprintf(fout,"%3d (%2d..%-2d): ( ",
            i+1,tree.branches[k][0]+1,tree.branches[k][1]+1);
         FOR(j,com.ns) if(partition[i*com.ns+j]) fprintf(fout,"%d ",j+1);
         fputs(")\n",fout);
      }
      if(nodes[tree.root].nson<=2) 
         fputs("\nRooted tree, results may not be correct.\n",fout);
      fputs("\nCorrect internal branches compared with the 1st tree:\n",fout);
      FOR(k,nib[0]) nIBsame[k]=0;
      for (i=1,mp=vp=0; i<ntree; i++,FPN(fout)) {
         ReadTreeN (ftree, &j, &k, 0, 1); 
         nib[1]=tree.nbranch-com.ns;
         Tree2Partition(partition+lpart);
         nsame=NSameBranch (partition,partition+lpart, nib[0],nib[1],IBsame);

         psame=nsame/(double)nib[0];
         FOR(k,nib[0]) nIBsame[k]+=IBsame[k];
         fprintf(fout,"1 vs. %3d: %4d: ", i+1,nsame);
         FOR(k,nib[0]) if(IBsame[k]) fprintf(fout," %2d", k+1);
         printf("1 vs. %5d: %6d/%d  %10.4f\n", i+1,nsame,nib[0],psame);
         vp += square(psame - mp)*(i-1.)/i;
         mp=(mp*(i-1.) + psame)/i;
      }
      vp=(ntree<=2 ? 0 : sqrt(vp/((ntree-1-1)*(ntree-1.))));
      fprintf(fout,"\nmean and S.E. of proportion of identical partitions\n");
      fprintf(fout,"between the 1st and all the other %d trees ", ntree-1);
      fprintf(fout,"(ignore these if not revelant):\n %.4f +- %.4f\n", mp, vp);
      fprintf(fout,"\nNumbers of times, out of %d, ", ntree-1);
      fprintf(fout,"interior branches of tree 1 are present");
      fputs("\n(This may be bootstrap support for nodes in tree 1)\n",fout);
      FOR(k,nib[0]) { 
         i=tree.branches[parti2B[k]][0]+1;  j=tree.branches[parti2B[k]][1]+1; 
         fprintf(fout,"%3d (%2d..%-2d): %6d (%5.1f%%)\n",
            k+1,i,j,nIBsame[k],nIBsame[k]*100./(ntree-1.));
      }
예제 #4
0
void LabelClades(FILE *fout)
{
/* This reads in a tree and scan species names to check whether they form a 
   paraphyletic group and then label the clade.
   It assumes that the tree is unrooted, and so goes through two rounds to check
   whether the remaining seqs form a monophyletic clade.
*/
   FILE *ftree;
   int unrooted=1,iclade, sizeclade, mrca, paraphyl, is, imrca, i,j,k, lasts, haslength;
   char key[96]="A", treef[64]="/A/F/flu/HA.all.prankcodon.tre", *p,chosen[NS], *endstr="end";
   int *anc[NS-1], loc, bitmask, SI=sizeof(int)*8;
   int debug;

   printf("Tree file name? ");
   scanf ("%s", treef);
   printf("Treat tree as unrooted (0 no, 1 yes)? ");
   scanf ("%d", &unrooted);

   ftree = gfopen (treef,"r");
   fscanf (ftree, "%d%d", &com.ns, &j);
   if(com.ns<=0) error2("need ns in tree file");
   debug = (com.ns<20);

   i = (com.ns*2-1)*sizeof(struct TREEN);
   if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
   for(i=0; i<com.ns*2-1; i++)  nodes[i].nodeStr = NULL;
   for(i=0; i<com.ns-1; i++) {
      anc[i] = (int*)malloc((com.ns/SI+1)*sizeof(int));
      if(anc[i]==NULL)  error2("oom");
   }
   ReadTreeN(ftree, &haslength, &j, 1, 0);
   fclose(ftree);
   if(debug) { OutTreeN(F0, 1, PrNodeNum);  FPN(F0); }

   for(iclade=0; iclade<com.ns-1; iclade++) {
      printf("\nString for selecting sequences (followed by non-digit) (end to end)? ");
      scanf("%s", key);
      if(strcmp(endstr, key) == 0)
         break;
      for(i=0; i<com.ns; i++) 
         chosen[i] = '\0';


      k = strlen(key);
      for(i=0; i<com.ns; i++) {
         if( (p=strstr(com.spname[i], key)) 
            && !isdigit(p[k]) )
               chosen[i] = 1;
      }

      /*
      for(i=0; i<com.ns; i++) 
         if(strstr(com.spname[i], key)) chosen[i] = 1;
      */

      /* look for MRCA, going through two rounds, assuming unrooted tree */
      for(imrca=0; imrca<1+unrooted; imrca++) {
         if(imrca) 
            for(i=0; i<com.ns; i++) chosen[i] = 1 - chosen[i]; 

         for(i=0,sizeclade=0; i<com.ns; i++) 
            if(chosen[i]) {
               sizeclade ++;
               lasts = i;
            }

         if(sizeclade <= 1 || sizeclade >= com.ns-1) {
            puts("unable to form a clade.  <2 seqs.");
            break;
         }
         for(i=0; i<com.ns-1; i++) for(j=0; j<com.ns/SI+1; j++) 
            anc[i][j] = 0;
         for(is=0; is<com.ns; is++) {
            if(chosen[is]==0) continue;
            loc = is/SI;  bitmask = 1 << (is%SI);
            for(j=nodes[is].father; j!=-1; j=nodes[j].father) {
               anc[j-com.ns][loc] |= bitmask;
               if(is==lasts) {
                  for(i=0,k=0; i<com.ns; i++)
                     if(anc[j-com.ns][i/SI] & (1<<(i%SI)))
                        k ++;
                  if(k==sizeclade) {
                     mrca = j;  break;
                  }
               }
            }
         }
         if(imrca==0 && mrca!=tree.root) /* 1st round is enough */
            break;
      }

      if(sizeclade <= 1 || sizeclade >= com.ns-1 || mrca==tree.root) {
         printf("Unable to label.  Ignored.");
         continue;
      }

      if(debug) 
         for(is=0; is<com.ns-1; is++) {
            printf("\nnode %4d: ", is+com.ns);
            for(j=0; j<com.ns; j++) {
               loc = j/SI;  bitmask = 1 << (j%SI);
               printf(" %d", (anc[is][loc] & bitmask) != 0);
            }
         }

      printf("\nClade #%d (%s): %d seqs selected, MRCA is %d\n", iclade+1, key, sizeclade, mrca+1);
      for(is=0,paraphyl=0; is<com.ns; is++) {
         if(chosen[is] == 0)
            for(j=nodes[is].father; j!=-1; j=nodes[j].father)
               if(j==mrca) { paraphyl++;  break; }
      }
      if(paraphyl) 
         printf("\nThis clade is paraphyletic, & includes %d other sequences\n", paraphyl);

      nodes[mrca].label = iclade+1;
      if(debug) OutTreeN(F0, 1, haslength|PrLabel);
   }

   for(i=0; i<com.ns-1; i++)  free(anc[i]);
   OutTreeN(fout, 1, haslength|PrLabel);  FPN(fout);
   printf("Printed final tree with labels in evolver.out\n");
   exit(0);
}