示例#1
0
void main(){
float x[200],u1,u2,mle[10],mlenum,mleden,lm,mlm,mu[4],sig[4],z;
int n,i,j,k,c;
FILE *p, *q;
clrscr();
randomize();
p=fopen("C:/cprog/3bin.txt","r");
q=fopen("C:/cprog/3bout.txt","w");
fscanf(p, "%d", &n);
for(i=0; i<n ; i++)
	fscanf(p, "%f%f", &mu[i], &sig[i]);
fprintf(q, "N\t-2Log(lm(x))\n\n");
for(i=0;i<40;i++){
	c=0; mlenum=0.0; mleden=0.0; lm=0.0;
	for(j=0;j<n;j+=2){
		u1=(float)rand()/(RAND_MAX+1.0);
		u2=(float)rand()/(RAND_MAX+1.0);
		x[j]=(float)sqrt(-2*log(u1))*cos(2.0*3.1428*u2);
		x[j+1]=(float)sqrt(-2*log(u1))*sin(2.0*3.1428*u2);
		x[j]=2+sqrt(12)*x[j];
		x[j+1]=2+sqrt(12)*x[j+1];
	}
	for(j=0;j<3;j++){
		mle[c]=0.0;
		for(k=0;k<3;k++){
			mle[c]=(float)maxlike(x, mu[j], sig[k], n);
			if(c==0){
				mlenum=mle[c];
				mleden=mle[c];
			}
			else	mleden=(float)max(mleden,mle[c]);
			if((mu[j]==1 || mu[j]==2) && (sig[k]==4||sig[k]==9) && c>0)
				mlenum=(float)max(mlenum,mle[c]);
		}
		c++;
	}
	lm=-2*(mlenum-mleden);
	fprintf(q,"%d\t%f\n",i+1,lm);
	mlm+=lm;
}
mlm=mlm/40;
z=(mlm-2)/sqrt(4);
if(z>1.95996 || z<-1.95996)
	fprintf(q,"\n\nz = %f\n -2log(lm(x)) does not follow a chi-sq distribution with 2 d.f.",z);
else	fprintf(q,"\n\nz = %f -2log(lm(x)) follows a chi-sq distribution with 2 d.f.",z);
fclose(p);
fclose(q);
}
示例#2
0
int main(int argc, char** argv){

/* 		A->0 ; C->1 ; G->2 ; T->3 		*/

  int i, j, nbseq, nbseqvrai, nbtree, nbdataset, maxtreesize, allcouples=0;
  char nomfinseq[100], nomfintree[100], nomfopt[100], *seq[MAXNSP*MAXDATASET], *seqname[MAXNSP*MAXDATASET], *comments[MAXNSP*MAXDATASET], muet, print1;
  char* alltrees, **c_tree, *prov, *prov2, *ctree1, *ctree2;
  double **maxl;
  FILE *treefile, *optfile, *outfile1, *outfile2, *in;
  FILE* dutheil;
  options opt;

/* printmemory_init(); */

  init_cas();

  srand48(seed);


   /** input sequences **/


  if(argc<2){
    muet=0;
    while(1){
      printf("\nSequence file (MASE) ?  ");
      gets(nomfinseq);
      in=fopen(nomfinseq, "r");
      if(in) break;
      printf("Cannot find file : %s\n", nomfinseq);
    }
  }
  else{
    in=fopen(argv[1], "r");
    if(!in){
      printf("Cannot find sequence file : %s\n", argv[1]);
      exit(EXIT_FAILURE);
    }
    sprintf(nomfinseq, "%s", argv[1]);
    muet=1;
  }
  fclose(in);
  if(muet)
    nbseq=readmasemuet(nomfinseq, seq, seqname, comments, MAXDATASET*MAXNSP);
  else
    nbseq=readmaseseqs(nomfinseq, seq, seqname, comments, MAXDATASET*MAXNSP);
  nbseqvrai=nbseq;   
  for(i=1;i<=nbseq/2;i++){ 
    if(strncmp(comments[i], ";;", 2)==0){
      nbseqvrai=i;
      break;
    }
  }
  if(nbseq%nbseqvrai!=0){
    printf("Bad sequence file %s\n", nomfinseq);
    exit(EXIT_FAILURE);
  }
  nbdataset=nbseq/nbseqvrai;

  for(i=0;i<nbdataset;i++)
    refresh(seq+i*nbseqvrai, nbseqvrai, 0);


   /** input trees **/

  maxtreesize=50*nbseqvrai*MAXNTREE;
  alltrees=(char*)check_alloc(maxtreesize+1, sizeof(char));
  c_tree=(char**)check_alloc(MAXNTREE, sizeof(char*));

  if(argc<3){
    while(1){
      printf("\nTree file ?  ");
      gets(nomfintree);
      treefile=fopen(nomfintree, "r");
      if(treefile) break;
      printf("Cannot find file : %s\n", nomfintree);
    }
  }
  else{
    treefile=fopen(argv[2], "r");
    if(!treefile){
      printf("Cannot find tree file : %s\n", argv[2]);
      exit(EXIT_FAILURE);
    }
  }

  i=0;
  while(i<maxtreesize && (alltrees[i]=getc(treefile))!=EOF) i++;
  alltrees[i]='\0';

  fclose(treefile);

  i=0;
  prov=alltrees;
  while(*prov) {if(*prov==';') i++; prov++;}
  free(c_tree);
  c_tree=(char**)check_alloc(i, sizeof(char*));

  i=0; prov=alltrees;
  while(*prov){
    if(*prov=='[') i++;
    if(*prov==']') i--;
    if(i!=0 && i!=1){
      printf("Unmatched brackets [] in tree file\n");
      exit(EXIT_FAILURE);
    }
    if(i==1 && (*prov==';' || *prov=='(')) *prov='.';
    prov++;
  }


  nbtree=0;
  prov=alltrees;
  while(1){
    prov2=strtok(prov, ";");
    if(prov) prov=NULL;
    if(!prov2) break;
    while(*prov2 && *prov2!='(') prov2++;
    if(*prov2==0) continue;
    c_tree[nbtree]=prov2;
    nbtree++;
  }


  for(i=0;i<nbtree;i++){
    prov=c_tree[i];
    while(*prov) prov++;
    *prov=';'; *(prov+1)=0;
  }


   /** input options **/


  if(argc<4){
    while(1){
      printf("\nOption file ?  ");
      gets(nomfopt);
      optfile=fopen(nomfopt, "r");
      if(optfile) break;
      printf("Cannot find file : %s\n", nomfopt);
    }
  }
  else{
    optfile=fopen(argv[3], "r");
    if(!optfile){
      printf("Cannot find option file : %s\n", argv[3]);
      exit(EXIT_FAILURE);
    }
  }
  getoptions(&opt, optfile);
  print1=opt->print->PRINT1;


  allcouples=opt->ALLCOUPLES;

  if(opt->print->EVAL_OUT){
    dutheil=fopen("detailed_out", "w");
    fprintf(dutheil, "numero\tarbre\tlnL\tTs/Tv\tGCanc\talpha\tcovar\tpi\n");
    fclose(dutheil);
  }
  

  if(print1) 
    printf("\n%d sequence data sets and %d trees found : ", nbdataset, nbtree);
  if(!allcouples){
    nbdataset=mini(nbdataset, nbtree);
    nbtree=nbdataset;
  }
  if(print1)
    printf("%d evaluations processed\n\n", allcouples?nbdataset*nbtree:nbdataset);

   

  maxl=(double**)check_alloc(nbdataset, sizeof(double*));
  for(i=0;i<nbdataset;i++)
    maxl[i]=(double*)check_alloc(nbtree, sizeof(double));

  ctree1=(char*)check_alloc(50*nbseq, sizeof(char));
  ctree2=(char*)check_alloc(50*nbseq, sizeof(char));
  outfile1=fopen("treefile.eqgc", "w");
  outfile2=fopen("treefile.ndgc", "w");
  if(outfile1==NULL || outfile2==NULL){ printf("Cannot write tree file\n"); exit(EXIT_FAILURE); }

  for(i=0;i<nbdataset;i++){
    for(j=0;j<nbtree;j++){
      if(!allcouples) j=i;
      if(print1) printf("\ndata set %d , tree %d\n", i+1, j+1);
      maxl[i][j]=maxlike(nbseqvrai, seq+i*nbseqvrai, seqname+i*nbseqvrai, c_tree[j], opt, ctree1, ctree2);

      if(nbdataset*nbtree>1){
        fprintf(outfile1, "[data set %d, tree %d]\n%s\n\n", i+1, j+1, ctree1);
        fprintf(outfile2, "[data set %d, tree %d]\n%s\n\n", i+1, j+1, ctree2);
      }
      else{
        fprintf(outfile1, "%s\n", ctree1);
        fprintf(outfile2, "%s\n", ctree2);
      }
      if(!allcouples) break;
    }
  }
  
  if(print1){
    if(nbdataset*nbtree==1){
      printf("Tree is written into files : treefile.eqgc (equilibrium G+C content)\n");
      printf("                             treefile.ndgc (G+C content at each node)\n\n");
    }
    else{
      printf("Trees are written into files : treefile.eqgc (equilibrium G+C content)\n");
      printf("                               treefile.ndgc (G+C content at each node)\n\n");
    }
  }

  return 0;
}
示例#3
0
double shake(int nb, char** seq, char** seqname, char* ctree, options opt, char** eval_input, int nb_eval_input){

  int nb2, i, j, ii, jj, k, l, *ttree[MAXNSP], **curtree, **newtree, **evaltree, nbbi, nbdclade, nbgclade, print1, print2;
  int nochange, pres_grossiere=-1, l1, l2, restart_d, restart_g, oldmovedist;
  int **dclade, **gclade, **newdclade, **newgclade, **ddist, **gdist, movedist, maxmovedist;
  int **list_tree, lliste, *solid;
  long nblist, nblistmax;
  double *lgbp, *sortedlgbp, *lgbi, *bootvals, fracroot1, lkh, maxlkh, maxlcrossedbranch;
  char *nom[MAXNSP], *nom2[MAXNSP], **dcladename, **gcladename, **list1, **list2, racine, *ctreenew, *ctreenew_nobl, *treedeb, *ctree1, *ctree2;
  FILE* outfile1, *outfile2;
  print_option trueprint, noprint;


  print1=opt->print->PRINT1;
  print2=opt->print->PRINT2;
  noprint=check_alloc(1, sizeof(struct print_option));
  noprint->PRINT3=0;
  if(opt->print->PRINT2)
    noprint->PRINT1=noprint->PRINT2=1;
  else
    noprint->PRINT1=noprint->PRINT2=0;
  if(print1) noprint->PRINT0=1; else noprint->PRINT0=0;
  trueprint=opt->print;
  opt->print=noprint;
  maxlcrossedbranch=opt->SH_MAXLCROSSED;



		/* READ TREE STRING */

  lgbp=(double*)check_alloc(nb+1, sizeof(double));
  sortedlgbp=(double*)check_alloc(nb+1, sizeof(double));
  lgbi=(double*)check_alloc(nb+1, sizeof(double));
  for(i=0;i<nb+1;i++)
    lgbp[i]=lgbi[i]=-1.;

  solid=(int*)check_alloc(nb+1, sizeof(int));
  bootvals=(double*)check_alloc(nb+1, sizeof(double));
  if(nb>=MAXNSP) {printf("Too many sequences\n"); exit(EXIT_FAILURE);}
  for(i=0;i<=nb;i++){
    nom[i]=(char*)check_alloc(MAXLNAME+1, sizeof(char));
    nom2[i]=(char*)check_alloc(MAXLNAME+1, sizeof(char));
    ttree[i]=(int*)check_alloc(nb, sizeof(int));
  }
  list1=check_alloc(nb, sizeof(char*));
  list2=check_alloc(nb, sizeof(char*));
  curtree=(int**)check_alloc(nb+1, sizeof(int*));
  newtree=(int**)check_alloc(nb+1, sizeof(int*));
  evaltree=(int**)check_alloc(nb+1, sizeof(int*));
  for(i=0;i<nb+1;i++) newtree[i]=(int*)check_alloc(nb-2, sizeof(int));
  ctreenew=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char));
  ctree1=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char));
  ctree2=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char));
  ctreenew_nobl=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char));


  nb2=ctot(ctree, ttree, lgbi, lgbp, bootvals, nom, &racine, &nbbi);

  if(nb2<nb){
    printf("More species in sequence file than in tree file\n");
    exit(EXIT_FAILURE);
  }
  if(nb2>nb){
    printf("More species in tree file than in sequence file\n");
    exit(EXIT_FAILURE);
  }


	/* PREPARE TREE : UNROOT, SET LEFT and RIGHT LISTS, SORT TAXA */

  if(racine=='r'){
    unroot(ttree, nb, lgbi, lgbp, bootvals, nom, list1, list2, &l1, &l2, &fracroot1);
  }
  else{
    printf("Tree must be rooted\n");
    exit(EXIT_FAILURE);
  }
  nbbi--;
  if(nbbi!=nb-3){
    printf("Tree must be bifurcating\n");
    exit(EXIT_FAILURE);
  }

  for(i=0;i<nb;i++){
    for(j=0;j<nb;j++){
      if(samename(seqname[i], nom[j])){
	curtree[i]=ttree[j];
	sortedlgbp[i]=lgbp[j];
	break;
      }
    }
  }

  for(i=0;i<nb-3;i++) if(bootvals[i]>opt->SH_MAXBOOTCROSSED) solid[i]=1;

  ctree_noblbs(ctree, ctreenew_nobl, strlen(ctree));

	/* EVALUATE INITIAL TREE */

  if(print1)
    printf("\nEvaluating initial tree : \n%s\n", ctreenew_nobl);
  if(print2)
    printf("\n");

  maxlkh=maxlike(nb, seq, seqname, curtree, lgbi, sortedlgbp, nbbi, l1, list1, l2, list2, opt, NULL, NULL, NULL);
  if(opt->SH_RESTART) save_current_best("current_best_tree", ctreenew_nobl, maxlkh, 1);
  if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, maxlkh, 1);

	/* ALLOCATE SHAKE VARIABLES */

  nbdclade=2*l1-1;
  nbgclade=2*l2-1;
  dclade=(int**)check_alloc(nbdclade, sizeof(int*));
  for(i=0;i<nbdclade;i++)
    dclade[i]=(int*)check_alloc(nb, sizeof(int));
  gclade=(int**)check_alloc(2*nb, sizeof(int*));
  for(i=0;i<nbgclade;i++)
    gclade[i]=(int*)check_alloc(nb, sizeof(int));
  newdclade=(int**)check_alloc(nbdclade, sizeof(int*));
  for(i=0;i<nbdclade;i++)
    newdclade[i]=(int*)check_alloc(nb, sizeof(int));
  newgclade=(int**)check_alloc(nbgclade, sizeof(int*));
  for(i=0;i<nbgclade;i++)
    newgclade[i]=(int*)check_alloc(nb, sizeof(int));
  ddist=(int**)check_alloc(nbdclade, sizeof(int*));
  gdist=(int**)check_alloc(nbgclade, sizeof(int*));
  for(i=0;i<nbdclade;i++) ddist[i]=(int*)check_alloc(nbdclade, sizeof(int));
  for(i=0;i<nbgclade;i++) gdist[i]=(int*)check_alloc(nbgclade, sizeof(int));
  dcladename=(char**)check_alloc(nbdclade, sizeof(char*));
  gcladename=(char**)check_alloc(nbgclade, sizeof(char*));
  for(i=0;i<nbdclade;i++)
    dcladename[i]=(char*)check_alloc(nb*(MAXLNAME+3)+1, sizeof(char));
  for(i=0;i<nbgclade;i++)
    gcladename[i]=(char*)check_alloc(nb*(MAXLNAME+3)+1, sizeof(char));
  nblistmax=nb*nb;
  if(nblistmax<MIN_NBLISTMAX) nblistmax=MIN_NBLISTMAX;
  if(nblistmax<nb_eval_input) nblistmax=nb_eval_input;
  if(nblistmax>MAX_NBLISTMAX) nblistmax=MAX_NBLISTMAX;
  while(1){
    lliste=(nblistmax*(nb-3)+lmot-1)/lmot;
    list_tree=(int**)check_alloc(nb, sizeof(int*));
    for(i=0;i<nb;i++)
      list_tree[i]=(int*)calloc(lliste, sizeof(int));
    if(list_tree[nb-1]) break;
    nblistmax/=2;
    if(nblistmax==0){
      printf("Not enough memory\n");
      exit(EXIT_FAILURE);
    }
  }


	/* SET LIST OF EVALUATED TREES */

  if(eval_input){
    nblist=0;
    for(k=0;k<nb_eval_input;k++){
      for(i=0;i<=nb;i++) ttree[i]=check_alloc(nb, sizeof(int));
      nb2=ctot(eval_input[k], ttree, NULL, NULL, NULL, nom2, &racine, NULL);
      if(racine=='r')
        unroot(ttree, nb, NULL, NULL, NULL, nom2, NULL, NULL, NULL, NULL, NULL);
      else{ printf("Evaluated trees must be rooted\n"); exit(EXIT_FAILURE);}
      for(i=0;i<nb;i++){
        for(j=0;j<nb;j++){
          if(samename(seqname[i], nom2[j])){
	    evaltree[i]=ttree[j];
	    break;
          }
        }
      }
      if(deja_evalue(evaltree, nb, list_tree, nblist, nblistmax)) continue;
      addtolist(evaltree, nb, list_tree, nblist, nblistmax);
      nblist++;
    }
    printf("%d already evaluated topologies loaded\n", nblist);
  }
  else{
    addtolist(curtree, nb, list_tree, 0, nblistmax);
    nblist=1;
  }


	/* SHAKE */

  treedeb=ctreenew;
  nochange=1; movedist=0; 
  if(opt->SH_G>0 && opt->SH_G<nb-3) 
    maxmovedist=opt->SH_G;
  else
    maxmovedist=nb-3;

  if(print1)
    printf("\nStarting rearrangements\n");
  
  do{

    oldmovedist=movedist;

    if(nochange==1) movedist++;
    else movedist=1;

    if(print1 && !(nochange==0 && oldmovedist==1)){
      printf("\nCrossing %d internal branch", movedist);
      if(movedist>1) printf("es");
      printf("\n");
    }

    nochange=1;





    while(1){
      setclades(curtree, nb, seqname, l1, list1, l2, list2, dclade, gclade, ddist, gdist, nbdclade, nbgclade, dcladename, gcladename);
      restart_d=0;
      for(i=0;i<nbdclade;i++){
        for(j=0;j<nbdclade;j++){
	  if(abs(ddist[i][j])!=movedist) continue; 
	  if(ddist[i][j]<0) continue;
	  moveclade(dclade, newdclade, nbdclade, nb, ddist, i, j);
	  settree(newdclade, nbdclade, gclade, nbgclade, nb, newtree);

	  if(deja_evalue(newtree, nb, list_tree, nblist, nblistmax))
	    continue;
          if(solid_branch_crossed(newtree, curtree, nb, solid))
	    continue;

	  addtolist(newtree, nb, list_tree, nblist, nblistmax);
  	  nblist++;
	  if(print1){
	    printf("\nMoving %s toward %s\n", dcladename[i], dcladename[j]);
          }
	  if(print2)
	    printf("\n");
	  lkh=maxlike(nb, seq, seqname, newtree, NULL, NULL, nbbi, l1, list1, l2, list2, opt, ctreenew, NULL, NULL);

	  if(lkh>maxlkh){
  	    ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew));
	    if(print1)
	      printf("New tree is optimal : \n%s\n\nRestarting rearrangements\n", ctreenew_nobl);
	    maxlkh=lkh;
	    copytree(newtree, curtree, nb);
            if(opt->SH_RESTART) save_current_best("current_best_tree", ctreenew_nobl, lkh, 0);
            if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0);
	    restart_d=1;
	    nochange=0;
	    ctreenew=treedeb;
	    while(*ctreenew) {*ctreenew=0; ctreenew++; }
	    ctreenew=treedeb;
	    break;
	  }
	  else{
	    if(print1)
	      printf("No improvement:\n");
            ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew));
            if(print1)
              printf("%s\n", ctreenew_nobl);
            if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0);
	  }
	  ctreenew=treedeb;
	  while(*ctreenew) {*ctreenew=0; ctreenew++; }
	  ctreenew=treedeb;
        }
      if(restart_d) break;
      }
      if(!restart_d || movedist>1) break;
    }

    if(restart_d && movedist>1) continue;

    while(1){
      setclades(curtree, nb, seqname, l1, list1, l2, list2, dclade, gclade, ddist, gdist, nbdclade, nbgclade, dcladename, gcladename);
      restart_g=0;
      for(i=0;i<nbgclade;i++){
        for(j=0;j<nbgclade;j++){
	  if(abs(gdist[i][j])!=movedist) continue;
	  if(gdist[i][j]<0) continue;

	  moveclade(gclade, newgclade, nbgclade, nb, gdist, i, j);
	  settree(dclade, nbdclade, newgclade, nbgclade, nb, newtree);

	  if(deja_evalue(newtree, nb, list_tree, nblist, nblistmax))
	    continue;
          if(solid_branch_crossed(newtree, curtree, nb, solid))
	    continue;
	  addtolist(newtree, nb, list_tree, nblist, nblistmax);
	  nblist++;
	  if(print1){
	    printf("\nMoving %s toward %s\n", gcladename[i], gcladename[j]);
          }
	  if(print2){
	    printf("\n");
          }

	  lkh=maxlike(nb, seq, seqname, newtree, NULL, NULL, nbbi, l1, list1, l2, list2, opt, ctreenew, NULL, NULL);

	  if(lkh>maxlkh){
  	    ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew));
	    if(print1)
	      printf("New tree is optimal : \n%s\n\nRestarting rearrangements\n", ctreenew_nobl);
	    maxlkh=lkh;
	    copytree(newtree, curtree, nb);
	    if(opt->SH_RESTART) save_current_best("current_best_tree", ctreenew_nobl, lkh, 0);
            if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0);
            restart_g=1;
	    nochange=0;
	    ctreenew=treedeb;
	    while(*ctreenew) {*ctreenew=0; ctreenew++; }
	    ctreenew=treedeb;
	    break;
	  }
	  else{
	    if(print1)
	      printf("No improvement:\n");
            ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew));
            if(print1)
              printf("%s\n", ctreenew_nobl);
            if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0);
	  }
	  ctreenew=treedeb;
	  while(*ctreenew) {*ctreenew=0; ctreenew++; }
	  ctreenew=treedeb;
        }
      if(restart_g) break;
      }
      if(!restart_g || movedist>1) break;
    }
  } while(nochange==0 || movedist!=maxmovedist);



  	/* FINAL EVALUATION */

  if(print1)
    printf("\nFinal evaluation\n");
  if(print2)
    printf("\n");
  opt->print=trueprint;

  maxlkh=maxlike(nb, seq, seqname, curtree, NULL, NULL, nbbi, l1, list1, l2, list2, opt, NULL, ctree1, ctree2);

  if(ctree1){
    outfile1=fopen("treefile.eqgc", "w");
    outfile2=fopen("treefile.ndgc", "w");
    if(outfile1==NULL || outfile2==NULL){ printf("Cannot write tree file\n"); exit(EXIT_FAILURE); }
    fprintf(outfile1, "%s\n", ctree1);
    fprintf(outfile2, "%s\n", ctree2);
    if(print1){
      printf("Tree is written into files : treefile.eqgc (equilibrium G+C content)\n");
      printf("                             treefile.ndgc (G+C content at each node)\n\n");
    }
  }


  free(lgbp); free(sortedlgbp); free(lgbi);
  free(bootvals);
  for(i=0;i<nb;i++){ free(nom[i]); free(ttree[i]); }
  free(list1); free(list2); free(curtree);

  
  return maxlkh;
}