static double aterms_theta(double mya, void *mydata) { int i, t; double val = 0; #ifdef A_DEBUG float save_a = ddC.a_theta->a; double like; #endif S_remake(ddC.a_theta, mya); for (i=0; i<ddN.DT; i++) { for (t=0; t<ddN.T; t++) { if ( ddS.n_dt[i][t]>1 ) { val += S_S(ddC.a_theta,ddS.n_dt[i][t],ddS.c_dt[i][t]); } } val += poch(ddP.b_theta, mya, ddS.C_dT[i]); } myarms_evals++; #ifdef A_DEBUG yap_message("Eval aterms_theta(%lf) = %lf (S had %f)", mya, val, save_a); ddP.a_theta = mya; cache_update("at"); like = likelihood(); if ( last_val != 0 ) { yap_message(", lp=%lf diffs=%lf vs %lf\n", like, val-last_val, like-last_like); } last_like = like; last_val = val; #endif return val; }
static double aterms_mu(double mya, void *mydata) { int e, t; double val = 0; #ifdef A_DEBUG float save_a = ddC.a_mu->a; double like; #endif S_remake(ddC.a_mu, mya); for (e=0; e<ddN.E; e++) { for (t=0; t<ddN.T; t++) { if ( ddS.cp_et[e][t]==0 ) continue; if (e==ddN.E-1) val += S_S(ddC.a_mu, ddS.C_eDt[e][t], ddS.cp_et[e][t]); else val += S_S(ddC.a_mu, ddS.C_eDt[e][t] + ddS.cp_et[e+1][t], ddS.cp_et[e][t]); } val += poch(ddP.b_mu[e], mya, ddS.Cp_e[e]); } myarms_evals++; #ifdef A_DEBUG yap_message("Eval aterms_mu(%lf) = %lf (S had %f)", mya, val, save_a); ddP.a_mu = mya; cache_update("am"); like = likelihood(); if ( last_val != 0 ) { yap_message(", lp=%lf diffs=%lf vs %lf\n", like, val-last_val, like-last_like); } last_like = like; last_val = val; #endif return val; }
static double aterms_phi0(double mya, void *mydata) { int v; double val = 0; #ifdef A_DEBUG float save_a = ddC.a_phi0->a; double like; #endif S_remake(ddC.a_phi0, mya); val += poch(ddP.b_phi0, mya, ddS.S_0_nz); for (v=0; v<ddN.W; v++) { if ( ddS.S_0vT[v]>1 ) val += S_S(ddC.a_phi0, ddS.S_0vT[v], 1); } myarms_evals++; #ifdef A_DEBUG yap_message("Eval aterms_phi0(%lf) = %lf (S had %f)", mya, val, save_a); ddP.a_phi0 = mya; cache_update("ap0"); like = likelihood(); if ( last_val != 0 ) { yap_message(", lp=%lf diffs=%lf vs %lf\n", like, val-last_val, like-last_like); } last_like = like; last_val = val; #endif return val; }
static double aterms(double mya, void *mydata) { int i, t; double val = 0; double la = log(mya); #ifdef A_DEBUG float save_a = ddC.SX->a; double like; #endif S_remake(ddC.SX, mya); for (i=0; i<ddN.DT; i++) { uint32_t Td_ = 0; for (t=0; t<ddN.T; t++) { Td_ += ddS.Tdt[i][t]; if ( ddS.Ndt[i][t]>1 ) { val += S_S(ddC.SX,ddS.Ndt[i][t],ddS.Tdt[i][t]); } } val += Td_*la + lgamma(ddP.bpar/mya+Td_) - lgamma(ddP.bpar/mya); } myarms_evals++; #ifdef A_DEBUG yap_message("Eval aterms(%lf) = %lf (S had %f)", mya, val, save_a); ddP.apar = mya; cache_update("a"); like = likelihood(); if ( last_val != 0 ) { yap_message(", lp=%lf diffs=%lf vs %lf\n", like, val-last_val, like-last_like); } last_like = like; last_val = val; #endif return val; }
static double awterms(double myaw, void *mydata) { int i, t; double val = 0; double law = log(myaw); #ifdef A_DEBUG float save_a = ddC.SY->a; double like; #endif S_remake(ddC.SY, myaw); for (t=0; t<ddN.T; t++) { uint32_t Tw_ = 0; for (i=0; i<ddN.W; i++) { Tw_ += ddS.Twt[i][t]; if ( ddS.Nwt[i][t]>1 ) { val += S_S(ddC.SY,ddS.Nwt[i][t],ddS.Twt[i][t]); } } val += Tw_*law + lgamma(ddP_bwpar(t)/myaw+Tw_) - lgamma(ddP_bwpar(t)/myaw); } myarms_evals++; #ifdef A_DEBUG yap_message("Eval awterms(%lf) = %lf (S had %f)", myaw, val, save_a); ddP.awpar = myaw; cache_update("aw"); like = likelihood(); if ( last_val != 0 ) { yap_message(", lp=%lf diffs=%lf vs %lf\n", like, val-last_val, like-last_like); } last_like = like; last_val = val; #endif return val; }
static double aterms_phi1(double mya, void *mydata) { int e, t, v; double val = 0; #ifdef A_DEBUG float save_a = ddC.a_phi1->a; double like; #endif S_remake(ddC.a_phi1, mya); for (e=0; e<ddN.E; e++) { for (t=0; t<ddN.T; t++) { if ( ddS.S_Vte[t][e]==0 ) continue; val += poch(ddP.b_phi[e][t], mya, ddS.S_Vte[t][e]); for (v=0; v<ddN.W; v++) { if ( ddS.s_vte[v][t][e]==0 ) continue; if (e<ddN.E-1) { val += S_S(ddC.a_phi1, ddS.m_vte[v][t][e] + ddS.s_vte[v][t][e+1] , ddS.s_vte[v][t][e]); } else { val += S_S(ddC.a_phi1, ddS.m_vte[v][t][e], ddS.s_vte[v][t][e]); } } } } myarms_evals++; #ifdef A_DEBUG yap_message("Eval aterms_phi1(%lf) = %lf (S had %f)", mya, val, save_a); ddP.a_phi1 = mya; cache_update("ap"1); like = likelihood(); if ( last_val != 0 ) { yap_message(", lp=%lf diffs=%lf vs %lf\n", like, val-last_val, like-last_like); } last_like = like; last_val = val; #endif return val; }
static double aterms(double x, void *mydata) { int i, k; ALData *mp = (ALData *)mydata; double val = 0; if ( x<=0 ) { fprintf(stderr,"Illegal discount value in aterms()\n"); exit(1); } if ( mp->verbose>1 ) { fprintf(stderr,"Extending S for M=%d a=%lf\n", mp->maxt,x); } if ( mp->S ) S_remake(mp->S,x); else mp->S = S_make(mp->maxn,mp->maxt,mp->maxn,mp->maxt,x,S_STABLE); if ( !mp->S ) { fprintf(stderr,"Out of memory for S table\n"); exit(1); } for (i=0; i<mp->I; i++) { val += mp->T[i] * log(x) + lgamma(mp->T[i]+mp->bpar[i]/x) - lgamma(mp->bpar[i]/x); if ( mp->val ) { for (k=0; k<mp->K[i]; k++) { scnt_int n; stcnt_int t; mp->val(&n, &t, i, k); if ( n>1 ) val += S_S(mp->S, n, t); } } else { for (k=0; k<mp->K[i]; k++) if ( mp->n[i][k]>1 ) val += S_S(mp->S, mp->n[i][k],mp->t[i][k]); } } return val; }
/* * fills S table */ void S_make(int maxN, int maxM, double a) { int N, M; usedN = maxN; usedM = maxM; S_m = malloc(sizeof(S_m[0])*(maxM+1)); S_m[0] = NULL; for (M=1; M<=maxM; M++) { S_m[M] = malloc(sizeof(S_m[0][0])*maxN); } /* * all values outside bounds to log(0); * when N=M set to log(1) */ for (N=1; N<maxN; N++) { if ( N<=maxM ) tblSNM(N,N) = 0; } S_m[1][1] = 0; S_remake(a); }
int main(int argc, char* argv[]) { int i, j, c, iter, ITER=200; unsigned long int seed=0; int bcycle = 0; float bstart = 0; int acycle = 0; float astart = 0; int burnin = 0; stable_t *ST = NULL; int useN = DIM*2; MAXN = 1; MAXT = MAXSTAB; /* * default values for args */ while ( (c=getopt(argc, argv,"a:b:B:C:I:hH:I:N:P:S:s:T:v"))>=0 ) { switch ( c ) { case 'h': usage(burnin?burnin:ITER/2, ITER, useN); exit(0); case 'b': if ( !optarg || sscanf(optarg,"%f,%f",&bpar, &bstart)<1 ) yaps_quit("Need a valid 'b' argument\n"); break; case 'a': if ( !optarg || sscanf(optarg,"%f,%f",&apar,&astart)<1 ) yaps_quit("Need a valid 'a' argument\n"); break; case 'H': if ( !optarg || sscanf(optarg,"%d",&bcycle)!=1 ) yaps_quit("Need a valid 'G' argument\n"); break; case 'T': if ( !optarg || sscanf(optarg,"%d",&MAXT)!=1 ) yaps_quit("Need a valid 'T' argument\n"); break; case 'I': if ( !optarg || sscanf(optarg,"%d",&acycle)!=1 ) yaps_quit("Need a valid 'H' argument\n"); break; case 'N': if ( !optarg || sscanf(optarg,"%d",&useN)!=1 ) yaps_quit("Need a valid 'N' argument\n"); break; case 'C': if ( !optarg || sscanf(optarg,"%d",&ITER)!=1 ) yaps_quit("Need a valid 'C' argument\n"); break; case 'B': if ( !optarg || sscanf(optarg,"%d",&burnin)!=1 ) yaps_quit("Need a valid 'B' argument\n"); break; case 's': if ( !optarg || sscanf(optarg,"%lu",&seed)!=1 ) yaps_quit("Need a valid 's' argument\n"); break; case 'v': verbose++; break; #ifdef S_USE_THREADS case 'P': if ( !optarg || sscanf(optarg,"%u",&threads)!=1 ) yaps_quit("Need a valid 'P' argument\n"); break; #endif default: yaps_message("Bad command line argument\n\n"); usage(burnin?burnin:ITER/2, ITER, useN); exit(0); } } if ( useN>=MAXDATA ) yaps_quit("N too large\n"); if ( burnin==0 ) burnin = ITER/2; else if ( burnin>=ITER-1 ) yaps_quit("Burnin %d too large for cycles %d\n", burnin, ITER); yaps_message("Configuration details\n"); yaps_message("=====================\n"); /* * set random number generator */ if ( seed ) { rng_seed(rng,seed); } else { rng_time(rng,&seed); } yaps_message("Setting seed for data = %lu\n", seed); if ( acycle && apar==0 ) apar = 0.5; yaps_message("Setting a=%f, b=%f, N=%d, D=%d\n", apar, bpar, useN, NUMMN); yaps_message(" burnin=%d,", burnin); yaps_message(" cycles=%d\n", ITER); /* * fix pointers */ for (j=0; j<NUMMN; j++) { n[j] = &n_data[j*DIM]; t[j] = &t_data[j*DIM]; tave[j] = &tave_data[j*DIM]; } /* * initialise everything */ for (j=0; j<NUMMN; j++) { N[j] = useN; T[j] = 0; Tave[j] = 0; for (i=0; i<DIM; i++) { n[j][i] = 0; t[j][i] = 0; tave[j][i] = 0; } } /* * fix base distribution, uniform */ { for (i=0; i<DIM; i++) { H[i] = 1.0/DIM; } } /* * create data using a CRP to get initialisation for n[] */ c = 0; for (j=0; j<NUMMN; j++) { int cc; i = sampleH(); data[c++] = i; // first entry always adds a table n[j][i]++; t[j][i]++; T[j]++; for (cc=1; cc<N[j]; cc++) { float val = (cc+bpar)*rng_unit(rng); val -= T[j]*apar+bpar; if ( val<=0 ) { // new table i = sampleH(); t[j][i]++; T[j]++; } else { for (i=0; i<DIM; i++) { val -= n[j][i] - t[j][i]*apar; if ( val<0 ) break; } } assert(i<DIM); n[j][i]++; data[c++] = i; } } binit = bpar; /* * record maximum entries in data * do this where possible so that one can get the table * sizes right * */ MAXN = n[0][0]+1; MAXT = 1; for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) { if ( MAXN<=n[j][i] ) MAXN = n[j][i]+1; if ( MAXT<t[j][i] ) MAXT = t[j][i]*1.1+1; } } if ( MAXT>MAXN ) MAXT = MAXN; yaps_message("Making S for N=%d M=%d a=%lf\n", MAXN,MAXT,apar); ST = S_make(MAXN, MAXT, MAXN, MAXTAB, apar, S_STABLE | S_UVTABLE); if ( ST==NULL ) yaps_quit("Making S failed!\n"); S_report(ST,stdout); /* * the seed only sets the data/sample, * the seed for the simulation/Gibbs is always random */ rng_free(rng); rng_time(rng,&seed); //yaps_message("Resetting seed = %lu\n", seed); /* * report on initial data statistics */ yaps_message("\nData sampled\n"); yaps_message("============\n"); for (j=0; j<NUMMN; j++) { yaps_message("n[%d] =", j); for (i=0; i<DIM; i++) yaps_message(" %d", n[j][i]); yaps_message(" = %d\n", N[j]); yaps_message("t[%d] =",j); for (i=0; i<DIM; i++) yaps_message(" %d", t[j][i]); yaps_message(" = %d\n", T[j]); } /* * set the hyperparameters used in Gibbs, * can be different to data */ if ( bstart==0 ) bstart = bpar; if ( astart==0 ) astart = apar; // initialise latent stats and reporting info for (j=0; j<NUMMN; j++) { T[j] = 0; Tave[j] = 0; } tcnt = 0; bave = 0; bcnt = 0; aave = 0; acnt = 0; bpar = bstart; if ( verbose && bcycle!=0 ) yaps_message("Starting with initial b=%f\n", bpar); apar = astart; if ( verbose && acycle!=0 ) yaps_message("Starting with initial a=%f\n", apar); for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) { tave[j][i] = 0; t[j][i] = 0; if ( n[j][i]>0 ) { /* * initialise to a single table */ t[j][i] = 1; T[j]++; } } } for ( iter=0; iter<ITER; iter++) { /* * sampling with table indicators */ c = 0; for (j=0; j<NUMMN; j++) { int cc; for (cc=0; cc<N[j]; cc++) { float one; i = data[c++]; assert(n[j][i]); if ( n[j][i]==1 ) // this indicator must always be 1, no sampling continue; // sample whether it contributes to a table if ( t[j][i]>1 && (n[j][i]-1)*rng_unit(rng)<(t[j][i]-1) ) { t[j][i]--; T[j]--; } assert(t[j][i]<n[j][i]); // sample new table indicator one = H[i] * (bpar + T[j]*apar) * (t[j][i]) / (n[j][i]-t[j][i]+1) * S_V(ST, n[j][i],t[j][i]+1); if ( rng_unit(rng) < one/(one+1.0) ) { t[j][i]++; T[j]++; } } } /* * one major cycle of Gibbs sampler finished */ if ( verbose>1 ) { for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) printf(" %d", t[j][i]); printf(" = %d\n", T[j]); } } /* * sample & record b */ if ( bcycle!=0 && iter%bcycle==0 ) { // Gibbs on bpar (concentration par) too if ( bcycle<0 ) { int bc = -bcycle; for (bc-- ; bc>0; bc--) bpar = sampleb(bpar, 1, PB_shape, PB_scale, N, T, apar, rng, 1, 1); } bpar = sampleb(bpar, 1, PB_shape, PB_scale, N, T, apar, rng, 1, 1); if ( iter>=burnin ) { bave += bpar; bcnt ++; } } /* * sample & record a */ if ( acycle!=0 && iter%acycle==0 ) { int dimI[NUMMN]; double dimb[NUMMN]; for (j=0; j<NUMMN; j++) { dimI[j] = DIM; dimb[j] = bpar; } // Gibbs on apar (discount par) too if ( acycle<0 ) { int bc = -acycle; for (bc-- ; bc>0; bc--) apar = samplea(apar, NUMMN, dimI, T, n, t, NULL, dimb, rng, 1, 1); } apar = samplea(apar, NUMMN, dimI, T, n, t, NULL, dimb, rng, 1, 1); if ( iter>=burnin ) { aave += apar; acnt ++; } if ( verbose>1 ) yaps_message("Extending S for a=%lf\n", apar); if ( S_remake(ST,apar) ) yaps_message("Extending S failed\n"); } /* * full statistics collection */ if ( iter>=burnin ) { for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) { tave[j][i] += t[j][i]; } Tave[j] += T[j]; } tcnt ++; } } /* * report for this experiment */ yaps_message("\nEstimates\n"); yaps_message("=========\n"); for (j=0; j<NUMMN; j++) { yaps_message("t[%d] = ", j); for (i=0; i<DIM; i++) yaps_message(" %.2f", tave[j][i]/tcnt); yaps_message("\nT[%d]=%.2f\n", j, Tave[j]/tcnt); } if ( bcycle!=0 && bcnt>0 ) yaps_message("\nb=%.2f", bave/bcnt); if ( acycle!=0 && acnt>0 ) yaps_message("\na=%.3f", aave/acnt); yaps_message("\n"); S_free(ST); rng_free(rng); return 0; }
stable_t *S_make(unsigned initN, unsigned initM, unsigned maxN, unsigned maxM, double a, uint32_t flags) { int N; stable_t *sp = NULL; sp = mymalloc(sizeof(stable_t)); if ( !sp ) return NULL; if ( maxM<10 ) maxM = 10; if ( maxN<maxM ) maxN = maxM; if ( initM<10 ) initM = 10; if ( initN<initM ) initN = initM; if ( initN>maxN ) initN = maxM; if ( initN>maxN ) initN = maxN; if ( (flags&S_STABLE)==0 && (flags&S_UVTABLE)==0 ) return NULL; sp->tag = NULL; sp->memalloced = 0; sp->flags = flags; sp->maxN = maxN; sp->maxM = maxM; sp->usedN = initN; sp->usedM = initM; sp->usedN1 = initN; sp->startM = initM; sp->S = NULL; sp->SfrontN = sp->SfrontM = sp->S1 = NULL; sp->V = NULL; sp->VfrontN = sp->VfrontM = NULL; sp->Sf = sp->Vf = NULL; #ifdef S_USE_THREADS if ( (flags&S_THREADS) ) { // yaps_message("Initialised mutex \n"); pthread_mutex_init(&sp->mutex, NULL); } #endif sp->S1 = mymalloc(sizeof(sp->S1[0])*(initN)); if ( !sp->S1 ) { myfree(sp); return NULL; } if ( flags&S_STABLE ) { if ( flags&S_FLOAT ) { /* * allocate frontier */ sp->SfrontN = mymalloc(sizeof(sp->SfrontN[0])*(initM-1)); if ( !sp->SfrontN ) { S_free(sp); return NULL; } /* * sets diagonal entry of S since the loop writing * SfrontN never does the diagnal itself */ memset(sp->SfrontN,0,sizeof(sp->SfrontN[0])*(initM-1)); sp->SfrontM = mymalloc(sizeof(sp->SfrontM[0])*(initN-initM+1)); if ( !sp->SfrontM ) { S_free(sp); return NULL; } /* * allocate sp->Sf[] as vector of vectors */ sp->Sf = mymalloc(sizeof(sp->Sf[0])*(sp->usedN-2)); if ( !sp->Sf ) { S_free(sp); return NULL; } memset(sp->Sf,0,sizeof(sp->Sf[0])*(sp->usedN-2)); /* * allocate sp->Sf[0][.] to sp->Sf[startM-3][.] in one block */ sp->Sf[0] = mymalloc(sizeof(sp->Sf[0][0])*(sp->startM-1)*(sp->startM-2)/2); if ( !sp->Sf[0] ) { S_free(sp); return NULL; } for (N=1; N<=sp->startM-3; N++) sp->Sf[N] = sp->Sf[N-1] + N; /* * allocate remaining sp->Sf[N][.] as vectors */ assert(sp->startM-2+sp->Sf[sp->startM-3]-sp->Sf[0]== (sp->startM-1)*(sp->startM-2)/2); for (N=sp->startM-2; N<=sp->usedN-3; N++) { sp->Sf[N] = mymalloc(sizeof(sp->Sf[0][0])*(sp->usedM-1)); if ( !sp->Sf[N] ) { S_free(sp); return NULL; } } } else { sp->S = mymalloc(sizeof(sp->S[0])*sp->usedN); if ( !sp->S ) { S_free(sp); return NULL; } /* * allocate sp->S[0][.] to sp->S[startM-3][.] in one block */ sp->S[0] = mymalloc(sizeof(sp->S[0][0])*(sp->startM-1)*(sp->startM-2)/2); if ( !sp->S[0] ) { S_free(sp); return NULL; } for (N=1; N<=sp->startM-3; N++) sp->S[N] = sp->S[N-1] + N; /* * allocate remaining sp->S[N][.] as vectors for N>=usedM+1 * which store values M=2,...,usedM, so need (usedM-1) space */ assert(sp->startM-2+sp->S[sp->startM-3]-sp->S[0]== (sp->startM-1)*(sp->startM-2)/2); for (N=sp->startM-2; N<=sp->usedN-3; N++) { sp->S[N] = mymalloc(sizeof(sp->S[0][0])*(sp->usedM-1)); if ( !sp->S[N] ) { S_free(sp); return NULL; } } } } if ( flags&S_UVTABLE ) { if ( flags&S_FLOAT ) { /* * allocate frontier */ sp->VfrontN = mymalloc(sizeof(sp->VfrontN[0])*(initM-1)); if ( !sp->VfrontN ) { S_free(sp); return NULL; } /* * sets diagonal entry of V since the loop writing * VfrontN never does the diagnal itself */ memset(sp->VfrontN,0,sizeof(sp->VfrontN[0])*(initM-1)); sp->VfrontM = mymalloc(sizeof(sp->VfrontM[0])*(initN-initM+1)); if ( !sp->VfrontM ) { S_free(sp); return NULL; } sp->Vf = mymalloc(sizeof(sp->Vf[0])*sp->usedN); if ( !sp->Vf ) { S_free(sp); return NULL; } /* * allocate sp->Vf[0][.] to sp->Vf[startM-2][.] in one block */ sp->Vf[0] = mymalloc(sizeof(sp->Vf[0][0])*(sp->startM-1)*(sp->startM)/2); if ( !sp->Vf[0] ) { S_free(sp); return NULL; } for (N=1; N<=sp->startM-2; N++) sp->Vf[N] = sp->Vf[N-1] + N; /* * allocate remaining sp->Vf[N][.] as vectors for N>=usedM+1 * which store values M=2,...,usedM, so need (usedM-1) space */ assert(sp->startM-1+sp->Vf[sp->startM-2]-sp->Vf[0]== (sp->startM-1)*(sp->startM)/2); for (N=sp->startM-1; N<=sp->usedN-2; N++) { sp->Vf[N] = mymalloc(sizeof(sp->Vf[0][0])*(sp->usedM-1)); if ( !sp->Vf[N] ) { S_free(sp); return NULL; } } } else { sp->V = mymalloc(sizeof(sp->V[0])*sp->usedN); if ( !sp->V ) { S_free(sp); return NULL; } /* * allocate sp->V[0][.] to sp->V[startM-2][.] in one block */ sp->V[0] = mymalloc(sizeof(sp->V[0][0])*(sp->startM-1)*(sp->startM)/2); if ( !sp->V[0] ) { S_free(sp); return NULL; } for (N=1; N<=sp->startM-2; N++) sp->V[N] = sp->V[N-1] + N; /* * allocate remaining sp->V[N][.] as vectors for N>=usedM+1 * which store values M=2,...,usedM, so need (usedM-1) space */ assert(sp->startM-1+sp->V[sp->startM-2]-sp->V[0]== (sp->startM-1)*(sp->startM)/2); for (N=sp->startM-1; N<=sp->usedN-2; N++) { sp->V[N] = mymalloc(sizeof(sp->V[0][0])*(sp->usedM-1)); if ( !sp->V[N] ) { S_free(sp); return NULL; } } } } /* * this is where we actually build the Stirling numbers */ S_remake(sp,a); return sp; }