void main(){ float x[200],u1,u2,mle[10],mlenum,mleden,lm,mlm,mu[4],sig[4],z; int n,i,j,k,c; FILE *p, *q; clrscr(); randomize(); p=fopen("C:/cprog/3bin.txt","r"); q=fopen("C:/cprog/3bout.txt","w"); fscanf(p, "%d", &n); for(i=0; i<n ; i++) fscanf(p, "%f%f", &mu[i], &sig[i]); fprintf(q, "N\t-2Log(lm(x))\n\n"); for(i=0;i<40;i++){ c=0; mlenum=0.0; mleden=0.0; lm=0.0; for(j=0;j<n;j+=2){ u1=(float)rand()/(RAND_MAX+1.0); u2=(float)rand()/(RAND_MAX+1.0); x[j]=(float)sqrt(-2*log(u1))*cos(2.0*3.1428*u2); x[j+1]=(float)sqrt(-2*log(u1))*sin(2.0*3.1428*u2); x[j]=2+sqrt(12)*x[j]; x[j+1]=2+sqrt(12)*x[j+1]; } for(j=0;j<3;j++){ mle[c]=0.0; for(k=0;k<3;k++){ mle[c]=(float)maxlike(x, mu[j], sig[k], n); if(c==0){ mlenum=mle[c]; mleden=mle[c]; } else mleden=(float)max(mleden,mle[c]); if((mu[j]==1 || mu[j]==2) && (sig[k]==4||sig[k]==9) && c>0) mlenum=(float)max(mlenum,mle[c]); } c++; } lm=-2*(mlenum-mleden); fprintf(q,"%d\t%f\n",i+1,lm); mlm+=lm; } mlm=mlm/40; z=(mlm-2)/sqrt(4); if(z>1.95996 || z<-1.95996) fprintf(q,"\n\nz = %f\n -2log(lm(x)) does not follow a chi-sq distribution with 2 d.f.",z); else fprintf(q,"\n\nz = %f -2log(lm(x)) follows a chi-sq distribution with 2 d.f.",z); fclose(p); fclose(q); }
int main(int argc, char** argv){ /* A->0 ; C->1 ; G->2 ; T->3 */ int i, j, nbseq, nbseqvrai, nbtree, nbdataset, maxtreesize, allcouples=0; char nomfinseq[100], nomfintree[100], nomfopt[100], *seq[MAXNSP*MAXDATASET], *seqname[MAXNSP*MAXDATASET], *comments[MAXNSP*MAXDATASET], muet, print1; char* alltrees, **c_tree, *prov, *prov2, *ctree1, *ctree2; double **maxl; FILE *treefile, *optfile, *outfile1, *outfile2, *in; FILE* dutheil; options opt; /* printmemory_init(); */ init_cas(); srand48(seed); /** input sequences **/ if(argc<2){ muet=0; while(1){ printf("\nSequence file (MASE) ? "); gets(nomfinseq); in=fopen(nomfinseq, "r"); if(in) break; printf("Cannot find file : %s\n", nomfinseq); } } else{ in=fopen(argv[1], "r"); if(!in){ printf("Cannot find sequence file : %s\n", argv[1]); exit(EXIT_FAILURE); } sprintf(nomfinseq, "%s", argv[1]); muet=1; } fclose(in); if(muet) nbseq=readmasemuet(nomfinseq, seq, seqname, comments, MAXDATASET*MAXNSP); else nbseq=readmaseseqs(nomfinseq, seq, seqname, comments, MAXDATASET*MAXNSP); nbseqvrai=nbseq; for(i=1;i<=nbseq/2;i++){ if(strncmp(comments[i], ";;", 2)==0){ nbseqvrai=i; break; } } if(nbseq%nbseqvrai!=0){ printf("Bad sequence file %s\n", nomfinseq); exit(EXIT_FAILURE); } nbdataset=nbseq/nbseqvrai; for(i=0;i<nbdataset;i++) refresh(seq+i*nbseqvrai, nbseqvrai, 0); /** input trees **/ maxtreesize=50*nbseqvrai*MAXNTREE; alltrees=(char*)check_alloc(maxtreesize+1, sizeof(char)); c_tree=(char**)check_alloc(MAXNTREE, sizeof(char*)); if(argc<3){ while(1){ printf("\nTree file ? "); gets(nomfintree); treefile=fopen(nomfintree, "r"); if(treefile) break; printf("Cannot find file : %s\n", nomfintree); } } else{ treefile=fopen(argv[2], "r"); if(!treefile){ printf("Cannot find tree file : %s\n", argv[2]); exit(EXIT_FAILURE); } } i=0; while(i<maxtreesize && (alltrees[i]=getc(treefile))!=EOF) i++; alltrees[i]='\0'; fclose(treefile); i=0; prov=alltrees; while(*prov) {if(*prov==';') i++; prov++;} free(c_tree); c_tree=(char**)check_alloc(i, sizeof(char*)); i=0; prov=alltrees; while(*prov){ if(*prov=='[') i++; if(*prov==']') i--; if(i!=0 && i!=1){ printf("Unmatched brackets [] in tree file\n"); exit(EXIT_FAILURE); } if(i==1 && (*prov==';' || *prov=='(')) *prov='.'; prov++; } nbtree=0; prov=alltrees; while(1){ prov2=strtok(prov, ";"); if(prov) prov=NULL; if(!prov2) break; while(*prov2 && *prov2!='(') prov2++; if(*prov2==0) continue; c_tree[nbtree]=prov2; nbtree++; } for(i=0;i<nbtree;i++){ prov=c_tree[i]; while(*prov) prov++; *prov=';'; *(prov+1)=0; } /** input options **/ if(argc<4){ while(1){ printf("\nOption file ? "); gets(nomfopt); optfile=fopen(nomfopt, "r"); if(optfile) break; printf("Cannot find file : %s\n", nomfopt); } } else{ optfile=fopen(argv[3], "r"); if(!optfile){ printf("Cannot find option file : %s\n", argv[3]); exit(EXIT_FAILURE); } } getoptions(&opt, optfile); print1=opt->print->PRINT1; allcouples=opt->ALLCOUPLES; if(opt->print->EVAL_OUT){ dutheil=fopen("detailed_out", "w"); fprintf(dutheil, "numero\tarbre\tlnL\tTs/Tv\tGCanc\talpha\tcovar\tpi\n"); fclose(dutheil); } if(print1) printf("\n%d sequence data sets and %d trees found : ", nbdataset, nbtree); if(!allcouples){ nbdataset=mini(nbdataset, nbtree); nbtree=nbdataset; } if(print1) printf("%d evaluations processed\n\n", allcouples?nbdataset*nbtree:nbdataset); maxl=(double**)check_alloc(nbdataset, sizeof(double*)); for(i=0;i<nbdataset;i++) maxl[i]=(double*)check_alloc(nbtree, sizeof(double)); ctree1=(char*)check_alloc(50*nbseq, sizeof(char)); ctree2=(char*)check_alloc(50*nbseq, sizeof(char)); outfile1=fopen("treefile.eqgc", "w"); outfile2=fopen("treefile.ndgc", "w"); if(outfile1==NULL || outfile2==NULL){ printf("Cannot write tree file\n"); exit(EXIT_FAILURE); } for(i=0;i<nbdataset;i++){ for(j=0;j<nbtree;j++){ if(!allcouples) j=i; if(print1) printf("\ndata set %d , tree %d\n", i+1, j+1); maxl[i][j]=maxlike(nbseqvrai, seq+i*nbseqvrai, seqname+i*nbseqvrai, c_tree[j], opt, ctree1, ctree2); if(nbdataset*nbtree>1){ fprintf(outfile1, "[data set %d, tree %d]\n%s\n\n", i+1, j+1, ctree1); fprintf(outfile2, "[data set %d, tree %d]\n%s\n\n", i+1, j+1, ctree2); } else{ fprintf(outfile1, "%s\n", ctree1); fprintf(outfile2, "%s\n", ctree2); } if(!allcouples) break; } } if(print1){ if(nbdataset*nbtree==1){ printf("Tree is written into files : treefile.eqgc (equilibrium G+C content)\n"); printf(" treefile.ndgc (G+C content at each node)\n\n"); } else{ printf("Trees are written into files : treefile.eqgc (equilibrium G+C content)\n"); printf(" treefile.ndgc (G+C content at each node)\n\n"); } } return 0; }
double shake(int nb, char** seq, char** seqname, char* ctree, options opt, char** eval_input, int nb_eval_input){ int nb2, i, j, ii, jj, k, l, *ttree[MAXNSP], **curtree, **newtree, **evaltree, nbbi, nbdclade, nbgclade, print1, print2; int nochange, pres_grossiere=-1, l1, l2, restart_d, restart_g, oldmovedist; int **dclade, **gclade, **newdclade, **newgclade, **ddist, **gdist, movedist, maxmovedist; int **list_tree, lliste, *solid; long nblist, nblistmax; double *lgbp, *sortedlgbp, *lgbi, *bootvals, fracroot1, lkh, maxlkh, maxlcrossedbranch; char *nom[MAXNSP], *nom2[MAXNSP], **dcladename, **gcladename, **list1, **list2, racine, *ctreenew, *ctreenew_nobl, *treedeb, *ctree1, *ctree2; FILE* outfile1, *outfile2; print_option trueprint, noprint; print1=opt->print->PRINT1; print2=opt->print->PRINT2; noprint=check_alloc(1, sizeof(struct print_option)); noprint->PRINT3=0; if(opt->print->PRINT2) noprint->PRINT1=noprint->PRINT2=1; else noprint->PRINT1=noprint->PRINT2=0; if(print1) noprint->PRINT0=1; else noprint->PRINT0=0; trueprint=opt->print; opt->print=noprint; maxlcrossedbranch=opt->SH_MAXLCROSSED; /* READ TREE STRING */ lgbp=(double*)check_alloc(nb+1, sizeof(double)); sortedlgbp=(double*)check_alloc(nb+1, sizeof(double)); lgbi=(double*)check_alloc(nb+1, sizeof(double)); for(i=0;i<nb+1;i++) lgbp[i]=lgbi[i]=-1.; solid=(int*)check_alloc(nb+1, sizeof(int)); bootvals=(double*)check_alloc(nb+1, sizeof(double)); if(nb>=MAXNSP) {printf("Too many sequences\n"); exit(EXIT_FAILURE);} for(i=0;i<=nb;i++){ nom[i]=(char*)check_alloc(MAXLNAME+1, sizeof(char)); nom2[i]=(char*)check_alloc(MAXLNAME+1, sizeof(char)); ttree[i]=(int*)check_alloc(nb, sizeof(int)); } list1=check_alloc(nb, sizeof(char*)); list2=check_alloc(nb, sizeof(char*)); curtree=(int**)check_alloc(nb+1, sizeof(int*)); newtree=(int**)check_alloc(nb+1, sizeof(int*)); evaltree=(int**)check_alloc(nb+1, sizeof(int*)); for(i=0;i<nb+1;i++) newtree[i]=(int*)check_alloc(nb-2, sizeof(int)); ctreenew=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char)); ctree1=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char)); ctree2=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char)); ctreenew_nobl=(char*)check_alloc(2*MAXLNAME*nb, sizeof(char)); nb2=ctot(ctree, ttree, lgbi, lgbp, bootvals, nom, &racine, &nbbi); if(nb2<nb){ printf("More species in sequence file than in tree file\n"); exit(EXIT_FAILURE); } if(nb2>nb){ printf("More species in tree file than in sequence file\n"); exit(EXIT_FAILURE); } /* PREPARE TREE : UNROOT, SET LEFT and RIGHT LISTS, SORT TAXA */ if(racine=='r'){ unroot(ttree, nb, lgbi, lgbp, bootvals, nom, list1, list2, &l1, &l2, &fracroot1); } else{ printf("Tree must be rooted\n"); exit(EXIT_FAILURE); } nbbi--; if(nbbi!=nb-3){ printf("Tree must be bifurcating\n"); exit(EXIT_FAILURE); } for(i=0;i<nb;i++){ for(j=0;j<nb;j++){ if(samename(seqname[i], nom[j])){ curtree[i]=ttree[j]; sortedlgbp[i]=lgbp[j]; break; } } } for(i=0;i<nb-3;i++) if(bootvals[i]>opt->SH_MAXBOOTCROSSED) solid[i]=1; ctree_noblbs(ctree, ctreenew_nobl, strlen(ctree)); /* EVALUATE INITIAL TREE */ if(print1) printf("\nEvaluating initial tree : \n%s\n", ctreenew_nobl); if(print2) printf("\n"); maxlkh=maxlike(nb, seq, seqname, curtree, lgbi, sortedlgbp, nbbi, l1, list1, l2, list2, opt, NULL, NULL, NULL); if(opt->SH_RESTART) save_current_best("current_best_tree", ctreenew_nobl, maxlkh, 1); if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, maxlkh, 1); /* ALLOCATE SHAKE VARIABLES */ nbdclade=2*l1-1; nbgclade=2*l2-1; dclade=(int**)check_alloc(nbdclade, sizeof(int*)); for(i=0;i<nbdclade;i++) dclade[i]=(int*)check_alloc(nb, sizeof(int)); gclade=(int**)check_alloc(2*nb, sizeof(int*)); for(i=0;i<nbgclade;i++) gclade[i]=(int*)check_alloc(nb, sizeof(int)); newdclade=(int**)check_alloc(nbdclade, sizeof(int*)); for(i=0;i<nbdclade;i++) newdclade[i]=(int*)check_alloc(nb, sizeof(int)); newgclade=(int**)check_alloc(nbgclade, sizeof(int*)); for(i=0;i<nbgclade;i++) newgclade[i]=(int*)check_alloc(nb, sizeof(int)); ddist=(int**)check_alloc(nbdclade, sizeof(int*)); gdist=(int**)check_alloc(nbgclade, sizeof(int*)); for(i=0;i<nbdclade;i++) ddist[i]=(int*)check_alloc(nbdclade, sizeof(int)); for(i=0;i<nbgclade;i++) gdist[i]=(int*)check_alloc(nbgclade, sizeof(int)); dcladename=(char**)check_alloc(nbdclade, sizeof(char*)); gcladename=(char**)check_alloc(nbgclade, sizeof(char*)); for(i=0;i<nbdclade;i++) dcladename[i]=(char*)check_alloc(nb*(MAXLNAME+3)+1, sizeof(char)); for(i=0;i<nbgclade;i++) gcladename[i]=(char*)check_alloc(nb*(MAXLNAME+3)+1, sizeof(char)); nblistmax=nb*nb; if(nblistmax<MIN_NBLISTMAX) nblistmax=MIN_NBLISTMAX; if(nblistmax<nb_eval_input) nblistmax=nb_eval_input; if(nblistmax>MAX_NBLISTMAX) nblistmax=MAX_NBLISTMAX; while(1){ lliste=(nblistmax*(nb-3)+lmot-1)/lmot; list_tree=(int**)check_alloc(nb, sizeof(int*)); for(i=0;i<nb;i++) list_tree[i]=(int*)calloc(lliste, sizeof(int)); if(list_tree[nb-1]) break; nblistmax/=2; if(nblistmax==0){ printf("Not enough memory\n"); exit(EXIT_FAILURE); } } /* SET LIST OF EVALUATED TREES */ if(eval_input){ nblist=0; for(k=0;k<nb_eval_input;k++){ for(i=0;i<=nb;i++) ttree[i]=check_alloc(nb, sizeof(int)); nb2=ctot(eval_input[k], ttree, NULL, NULL, NULL, nom2, &racine, NULL); if(racine=='r') unroot(ttree, nb, NULL, NULL, NULL, nom2, NULL, NULL, NULL, NULL, NULL); else{ printf("Evaluated trees must be rooted\n"); exit(EXIT_FAILURE);} for(i=0;i<nb;i++){ for(j=0;j<nb;j++){ if(samename(seqname[i], nom2[j])){ evaltree[i]=ttree[j]; break; } } } if(deja_evalue(evaltree, nb, list_tree, nblist, nblistmax)) continue; addtolist(evaltree, nb, list_tree, nblist, nblistmax); nblist++; } printf("%d already evaluated topologies loaded\n", nblist); } else{ addtolist(curtree, nb, list_tree, 0, nblistmax); nblist=1; } /* SHAKE */ treedeb=ctreenew; nochange=1; movedist=0; if(opt->SH_G>0 && opt->SH_G<nb-3) maxmovedist=opt->SH_G; else maxmovedist=nb-3; if(print1) printf("\nStarting rearrangements\n"); do{ oldmovedist=movedist; if(nochange==1) movedist++; else movedist=1; if(print1 && !(nochange==0 && oldmovedist==1)){ printf("\nCrossing %d internal branch", movedist); if(movedist>1) printf("es"); printf("\n"); } nochange=1; while(1){ setclades(curtree, nb, seqname, l1, list1, l2, list2, dclade, gclade, ddist, gdist, nbdclade, nbgclade, dcladename, gcladename); restart_d=0; for(i=0;i<nbdclade;i++){ for(j=0;j<nbdclade;j++){ if(abs(ddist[i][j])!=movedist) continue; if(ddist[i][j]<0) continue; moveclade(dclade, newdclade, nbdclade, nb, ddist, i, j); settree(newdclade, nbdclade, gclade, nbgclade, nb, newtree); if(deja_evalue(newtree, nb, list_tree, nblist, nblistmax)) continue; if(solid_branch_crossed(newtree, curtree, nb, solid)) continue; addtolist(newtree, nb, list_tree, nblist, nblistmax); nblist++; if(print1){ printf("\nMoving %s toward %s\n", dcladename[i], dcladename[j]); } if(print2) printf("\n"); lkh=maxlike(nb, seq, seqname, newtree, NULL, NULL, nbbi, l1, list1, l2, list2, opt, ctreenew, NULL, NULL); if(lkh>maxlkh){ ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew)); if(print1) printf("New tree is optimal : \n%s\n\nRestarting rearrangements\n", ctreenew_nobl); maxlkh=lkh; copytree(newtree, curtree, nb); if(opt->SH_RESTART) save_current_best("current_best_tree", ctreenew_nobl, lkh, 0); if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0); restart_d=1; nochange=0; ctreenew=treedeb; while(*ctreenew) {*ctreenew=0; ctreenew++; } ctreenew=treedeb; break; } else{ if(print1) printf("No improvement:\n"); ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew)); if(print1) printf("%s\n", ctreenew_nobl); if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0); } ctreenew=treedeb; while(*ctreenew) {*ctreenew=0; ctreenew++; } ctreenew=treedeb; } if(restart_d) break; } if(!restart_d || movedist>1) break; } if(restart_d && movedist>1) continue; while(1){ setclades(curtree, nb, seqname, l1, list1, l2, list2, dclade, gclade, ddist, gdist, nbdclade, nbgclade, dcladename, gcladename); restart_g=0; for(i=0;i<nbgclade;i++){ for(j=0;j<nbgclade;j++){ if(abs(gdist[i][j])!=movedist) continue; if(gdist[i][j]<0) continue; moveclade(gclade, newgclade, nbgclade, nb, gdist, i, j); settree(dclade, nbdclade, newgclade, nbgclade, nb, newtree); if(deja_evalue(newtree, nb, list_tree, nblist, nblistmax)) continue; if(solid_branch_crossed(newtree, curtree, nb, solid)) continue; addtolist(newtree, nb, list_tree, nblist, nblistmax); nblist++; if(print1){ printf("\nMoving %s toward %s\n", gcladename[i], gcladename[j]); } if(print2){ printf("\n"); } lkh=maxlike(nb, seq, seqname, newtree, NULL, NULL, nbbi, l1, list1, l2, list2, opt, ctreenew, NULL, NULL); if(lkh>maxlkh){ ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew)); if(print1) printf("New tree is optimal : \n%s\n\nRestarting rearrangements\n", ctreenew_nobl); maxlkh=lkh; copytree(newtree, curtree, nb); if(opt->SH_RESTART) save_current_best("current_best_tree", ctreenew_nobl, lkh, 0); if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0); restart_g=1; nochange=0; ctreenew=treedeb; while(*ctreenew) {*ctreenew=0; ctreenew++; } ctreenew=treedeb; break; } else{ if(print1) printf("No improvement:\n"); ctree_noblbs(ctreenew, ctreenew_nobl, strlen(ctreenew)); if(print1) printf("%s\n", ctreenew_nobl); if(opt->SH_RESTART) save_evaluated("evaluated_trees", ctreenew_nobl, lkh, 0); } ctreenew=treedeb; while(*ctreenew) {*ctreenew=0; ctreenew++; } ctreenew=treedeb; } if(restart_g) break; } if(!restart_g || movedist>1) break; } } while(nochange==0 || movedist!=maxmovedist); /* FINAL EVALUATION */ if(print1) printf("\nFinal evaluation\n"); if(print2) printf("\n"); opt->print=trueprint; maxlkh=maxlike(nb, seq, seqname, curtree, NULL, NULL, nbbi, l1, list1, l2, list2, opt, NULL, ctree1, ctree2); if(ctree1){ outfile1=fopen("treefile.eqgc", "w"); outfile2=fopen("treefile.ndgc", "w"); if(outfile1==NULL || outfile2==NULL){ printf("Cannot write tree file\n"); exit(EXIT_FAILURE); } fprintf(outfile1, "%s\n", ctree1); fprintf(outfile2, "%s\n", ctree2); if(print1){ printf("Tree is written into files : treefile.eqgc (equilibrium G+C content)\n"); printf(" treefile.ndgc (G+C content at each node)\n\n"); } } free(lgbp); free(sortedlgbp); free(lgbi); free(bootvals); for(i=0;i<nb;i++){ free(nom[i]); free(ttree[i]); } free(list1); free(list2); free(curtree); return maxlkh; }