示例#1
0
文件: makermt.c 项目: Unode/ext_apps
int genrmt(char *infile, char *outfile)
{
  int i,j;
  FILE *fp;
  double x,t0,t1;
  char *cbuf,*fext;

  /* open file */
  switch(seqmode) {
  case SEQ_MOLPHY: fext=fext_molphy; break;
  case SEQ_PAML: fext=fext_paml; break;
  case SEQ_PAUP: fext=fext_paup; break;
  case SEQ_PUZZLE: fext=fext_puzzle; break;
  case SEQ_PHYML: fext=fext_phyml; break;
  case SEQ_MT: 
  default: fext=fext_mt; break;
  }
  if(infile) {
    fp=openfp(infile,fext,"r",&cbuf);
    printf("\n# reading %s",cbuf);
  } else {
    fp=STDIN;
    printf("\n# reading from stdin");
  }

  /* read file */
  mm=nn=0;
  switch(seqmode) {
  case SEQ_MOLPHY: 
    datmat = fread_mat_lls(fp, &mm, &nn); break;
  case SEQ_PAML: 
    datmat = fread_mat_lfh(fp, &mm, &nn); break;
  case SEQ_PAUP: 
    datmat = fread_mat_paup(fp, &mm, &nn); break;
  case SEQ_PUZZLE: 
    datmat = fread_mat_puzzle(fp, &mm, &nn); break;
  case SEQ_PHYML: 
    datmat = fread_mat_phyml(fp, &mm, &nn); break;
  case SEQ_MT: 
  default: 
    datmat = fread_mat(fp, &mm, &nn); break;  
  }
  if(infile) {fclose(fp);  FREE(cbuf);}
  printf("\n# M:%d N:%d",mm,nn);

  /* allocating buffers */
  datvec=new_vec(mm);
  bn=new_ivec(kk); rr1=new_vec(kk);

  /* calculate the log-likelihoods */
  for(i=0;i<mm;i++) {
    x=0; for(j=0;j<nn;j++) x+=datmat[i][j];
    datvec[i]=x;
  }
  
  /* calculate scales */
  for(i=0;i<kk;i++) {
    bn[i]=(int)(rr[i]*nn); /* sample size for bootstrap */
    rr1[i]=(double)bn[i]/nn; /* recalculate rr for integer adjustment */
  }

  /* open out file */
  if(outfile) {
    /* vt ascii write to file */
    fp=openfp(outfile,fext_vt,"w",&cbuf);
    printf("\n# writing %s",cbuf);
    fwrite_vec(fp,datvec,mm);
    fclose(fp); FREE(cbuf);
    /* rmt binary write to file */
    fp=openfp(outfile,fext_rmt,"wb",&cbuf);
    printf("\n# writing %s",cbuf);
    fwrite_bvec(fp,datvec,mm);
    fwrite_bvec(fp,rr1,kk);
    fwrite_bivec(fp,bb,kk);
    fwrite_bi(fp,kk);
  } else {
    /* rmt ascii write to stdout */
    printf("\n# writing to stdout");
    printf("\n# OBS:\n"); write_vec(datvec,mm);
    printf("\n# R:\n"); write_vec(rr1,kk);
    printf("\n# B:\n"); write_ivec(bb,kk);
    printf("\n# RMAT:\n");
    printf("%d\n",kk);
  }


  /* generating the replicates by resampling*/
  for(i=j=0;i<kk;i++) j+=bb[i];
  printf("\n# start generating total %d replicates for %d items",j,mm);
  fflush(STDOUT);
  t0=get_time();

  for(i=0;i<kk;i++) {
    repmat=new_lmat(mm,bb[i]);
    scaleboot(datmat,repmat,mm,nn,bn[i],bb[i]);
    if(outfile) {
      fwrite_bmat(fp,repmat,mm,bb[i]);
      putdot();
    } else {
      printf("\n## RMAT[%d]:\n",i); write_mat(repmat,mm,bb[i]);
    }
    free_lmat(repmat,mm);
  }

  t1=get_time();
  printf("\n# time elapsed for bootstrap t=%g sec",t1-t0);

  if(outfile) {
    fclose(fp); FREE(cbuf);
  }

  /* freeing buffers */
  free_vec(bn); free_vec(rr1); free_vec(datvec); free_mat(datmat);

  return 0;
}
/*==========================================
 * main
 *========================================== */
int main(int argc, char* argv[])
{
    int T; // number of topics
    int W; // number of unique words
    int D; // number of docs
    int N; // number of words in corpus

    int i, iter, seed;
    int *w, *d, *z, *order;
    double **Nwt, **Ndt, *Nt;
    double alpha, beta;

    if (argc == 1) {
        fprintf(stderr, "usage: %s T iter seed\n", argv[0]);
        exit(-1);
    }
    T    = atoi(argv[1]);
    assert(T>0);
    iter = atoi(argv[2]);
    assert(iter>0);
    seed = atoi(argv[3]);
    assert(seed>0);

    N = countN("docword.txt");
    w = ivec(N);
    d = ivec(N);
    z = ivec(N);
    read_dw("docword.txt", d, w, &D, &W);
    Nwt = dmat(W,T);
    Ndt = dmat(D,T);
    Nt  = dvec(T);

    alpha = 0.05 * N / (D * T);
    beta  = 0.01;

    printf("seed  = %d\n", seed);
    printf("N     = %d\n", N);
    printf("W     = %d\n", W);
    printf("D     = %d\n", D);
    printf("T     = %d\n", T);
    printf("iter  = %d\n", iter);
    printf("alpha = %f\n", alpha);
    printf("beta  = %f\n", beta);

    srand48(seed);
    randomassignment_d(N,T,w,d,z,Nwt,Ndt,Nt);
    order = randperm(N);

    add_smooth_d(D,T,Ndt,alpha);
    add_smooth_d(W,T,Nwt,beta);
    add_smooth1d(  T,Nt, W*beta);

    for (i = 0; i < iter; i++) {
        sample_chain_d(N,W,T,w,d,z,Nwt,Ndt,Nt,order);
        printf("iter %d \n", i);
    }

    printf("In-Sample Perplexity = %.2f\n",pplex_d(N,W,T,w,d,Nwt,Ndt));

    add_smooth_d(D,T,Ndt,-alpha);
    add_smooth_d(W,T,Nwt,-beta);
    add_smooth1d(  T,Nt, -W*beta);

    write_sparse_d(W,T,Nwt,"Nwt.txt");
    write_sparse_d(D,T,Ndt,"Ndt.txt");
    write_ivec(N,z,"z.txt");

    return 0;
}