int genrmt(char *infile, char *outfile) { int i,j; FILE *fp; double x,t0,t1; char *cbuf,*fext; /* open file */ switch(seqmode) { case SEQ_MOLPHY: fext=fext_molphy; break; case SEQ_PAML: fext=fext_paml; break; case SEQ_PAUP: fext=fext_paup; break; case SEQ_PUZZLE: fext=fext_puzzle; break; case SEQ_PHYML: fext=fext_phyml; break; case SEQ_MT: default: fext=fext_mt; break; } if(infile) { fp=openfp(infile,fext,"r",&cbuf); printf("\n# reading %s",cbuf); } else { fp=STDIN; printf("\n# reading from stdin"); } /* read file */ mm=nn=0; switch(seqmode) { case SEQ_MOLPHY: datmat = fread_mat_lls(fp, &mm, &nn); break; case SEQ_PAML: datmat = fread_mat_lfh(fp, &mm, &nn); break; case SEQ_PAUP: datmat = fread_mat_paup(fp, &mm, &nn); break; case SEQ_PUZZLE: datmat = fread_mat_puzzle(fp, &mm, &nn); break; case SEQ_PHYML: datmat = fread_mat_phyml(fp, &mm, &nn); break; case SEQ_MT: default: datmat = fread_mat(fp, &mm, &nn); break; } if(infile) {fclose(fp); FREE(cbuf);} printf("\n# M:%d N:%d",mm,nn); /* allocating buffers */ datvec=new_vec(mm); bn=new_ivec(kk); rr1=new_vec(kk); /* calculate the log-likelihoods */ for(i=0;i<mm;i++) { x=0; for(j=0;j<nn;j++) x+=datmat[i][j]; datvec[i]=x; } /* calculate scales */ for(i=0;i<kk;i++) { bn[i]=(int)(rr[i]*nn); /* sample size for bootstrap */ rr1[i]=(double)bn[i]/nn; /* recalculate rr for integer adjustment */ } /* open out file */ if(outfile) { /* vt ascii write to file */ fp=openfp(outfile,fext_vt,"w",&cbuf); printf("\n# writing %s",cbuf); fwrite_vec(fp,datvec,mm); fclose(fp); FREE(cbuf); /* rmt binary write to file */ fp=openfp(outfile,fext_rmt,"wb",&cbuf); printf("\n# writing %s",cbuf); fwrite_bvec(fp,datvec,mm); fwrite_bvec(fp,rr1,kk); fwrite_bivec(fp,bb,kk); fwrite_bi(fp,kk); } else { /* rmt ascii write to stdout */ printf("\n# writing to stdout"); printf("\n# OBS:\n"); write_vec(datvec,mm); printf("\n# R:\n"); write_vec(rr1,kk); printf("\n# B:\n"); write_ivec(bb,kk); printf("\n# RMAT:\n"); printf("%d\n",kk); } /* generating the replicates by resampling*/ for(i=j=0;i<kk;i++) j+=bb[i]; printf("\n# start generating total %d replicates for %d items",j,mm); fflush(STDOUT); t0=get_time(); for(i=0;i<kk;i++) { repmat=new_lmat(mm,bb[i]); scaleboot(datmat,repmat,mm,nn,bn[i],bb[i]); if(outfile) { fwrite_bmat(fp,repmat,mm,bb[i]); putdot(); } else { printf("\n## RMAT[%d]:\n",i); write_mat(repmat,mm,bb[i]); } free_lmat(repmat,mm); } t1=get_time(); printf("\n# time elapsed for bootstrap t=%g sec",t1-t0); if(outfile) { fclose(fp); FREE(cbuf); } /* freeing buffers */ free_vec(bn); free_vec(rr1); free_vec(datvec); free_mat(datmat); return 0; }
/*========================================== * main *========================================== */ int main(int argc, char* argv[]) { int T; // number of topics int W; // number of unique words int D; // number of docs int N; // number of words in corpus int i, iter, seed; int *w, *d, *z, *order; double **Nwt, **Ndt, *Nt; double alpha, beta; if (argc == 1) { fprintf(stderr, "usage: %s T iter seed\n", argv[0]); exit(-1); } T = atoi(argv[1]); assert(T>0); iter = atoi(argv[2]); assert(iter>0); seed = atoi(argv[3]); assert(seed>0); N = countN("docword.txt"); w = ivec(N); d = ivec(N); z = ivec(N); read_dw("docword.txt", d, w, &D, &W); Nwt = dmat(W,T); Ndt = dmat(D,T); Nt = dvec(T); alpha = 0.05 * N / (D * T); beta = 0.01; printf("seed = %d\n", seed); printf("N = %d\n", N); printf("W = %d\n", W); printf("D = %d\n", D); printf("T = %d\n", T); printf("iter = %d\n", iter); printf("alpha = %f\n", alpha); printf("beta = %f\n", beta); srand48(seed); randomassignment_d(N,T,w,d,z,Nwt,Ndt,Nt); order = randperm(N); add_smooth_d(D,T,Ndt,alpha); add_smooth_d(W,T,Nwt,beta); add_smooth1d( T,Nt, W*beta); for (i = 0; i < iter; i++) { sample_chain_d(N,W,T,w,d,z,Nwt,Ndt,Nt,order); printf("iter %d \n", i); } printf("In-Sample Perplexity = %.2f\n",pplex_d(N,W,T,w,d,Nwt,Ndt)); add_smooth_d(D,T,Ndt,-alpha); add_smooth_d(W,T,Nwt,-beta); add_smooth1d( T,Nt, -W*beta); write_sparse_d(W,T,Nwt,"Nwt.txt"); write_sparse_d(D,T,Ndt,"Ndt.txt"); write_ivec(N,z,"z.txt"); return 0; }