double S_S(stable_t *sp, unsigned N, unsigned T) { if ( (sp->flags & S_STABLE)==0 ) return -INFINITY; if ( N==T ) return 0; if ( T==1 ) return S_S1(sp, N); if ( N<T || T==0 ) return -INFINITY; if ( T>sp->usedM || N>sp->usedN ) { if ( N>sp->maxN || T>sp->maxM ) { if ( (sp->flags & S_QUITONBOUND) ) if ( sp->tag ) yaps_quit("S_S(%u,%u,%lf) tagged '%s' hit bounds\n",N,T,sp->a, sp->tag); else yaps_quit("S_S(%u,%u,%lf) hit bounds\n",N,T,sp->a); else return -INFINITY; } if ( S_extend(sp,N+1,T+1) ) yaps_quit("S_extend() out of memory\n"); } if ( sp->flags&S_FLOAT ) { assert(sp->Sf); assert(sp->Sf[N-3]); return sp->Sf[N-3][T-2]; } assert(sp->S); assert(sp->S[N-3]); return sp->S[N-3][T-2]; }
double S_V(stable_t *sp, unsigned n, unsigned m) { if ( (sp->flags & S_UVTABLE)==0 ) return 0; if ( m>=sp->usedM-1 || n>=sp->usedN-1 ) { if ( n>sp->maxN || m>sp->maxM ) { if ( (sp->flags & S_QUITONBOUND) ) { assert(n>sp->maxN || m>sp->maxM); if ( sp->tag ) yaps_quit("S_V(%u,%u,%lf) tagged '%s' hit bounds (%u,%u)\n",n,m,sp->a, sp->tag, sp->maxN, sp->maxM); else yaps_quit("S_V(%u,%u,%lf) hit bounds\n",n,m,sp->a); } else return 0; } // yaps_message("S_V(%s,%d,%d) calling extend\n", sp->tag, n, m); if ( S_extend(sp,n+1,m+1) ) yaps_quit("S_extend() out of memory\n"); } assert(m>=2); if ( n<m ) return 0; if ( sp->flags & S_FLOAT ) { assert(sp->Vf); if ( sp->Vf[n-2]==NULL ) yaps_quit("S_V(%s,%u,%u) Vf memory unavailable\n", sp->tag, n, m); assert(sp->Vf[n-2]); return sp->Vf[n-2][m-2]; } assert(sp->V); assert(sp->V[n-2]); return sp->V[n-2][m-2]; }
double S_U(stable_t *sp, unsigned n, unsigned m) { if ( m==1 ) return n - sp->a; if ( m<=1 ) yaps_quit("Bad constraints in S_U(%s,%u,%u)\n", sp->tag, n, m); assert(m>1); return n - m*sp->a + 1/S_V(sp,n,m); }
int main(int argc, char* argv[]) { int c; int n, t; stable_t *S; /* * default values for args */ while ( (c=getopt(argc, argv,"ha:N:T:"))>=0 ) { switch ( c ) { case 'a': if ( !optarg || sscanf(optarg,"%f",&apar)!=1 ) yaps_quit("Need a valid 'a' argument\n"); break; case 'N': if ( !optarg || sscanf(optarg,"%d",&N)!=1 ) yaps_quit("Need a valid 'N' argument\n"); break; case 'T': if ( !optarg || sscanf(optarg,"%d",&T)!=1 ) yaps_quit("Need a valid 'T' argument\n"); break; case 'h': usage(); exit(0); } } if ( T>N ) T = N; if ( !(S=S_make(N+1,T,2*N,2*T,apar,S_UVTABLE|S_STABLE|S_VERBOSE)) ) yaps_quit("S_make failed\n"); S_report(S, stdout); #if 1 /* * list various values */ printf("S(%d,%d) = %10.6lg, V=n/a U=%lg\n", N, 1, S_S(S,N,1), S_U(S,N,1)); for (t=2; t<=T; t++) printf("S(%d,%d) = %10.6lg, V=%lg U=%lg\n", N, t, S_S(S,N,t), S_V(S,N,t), S_U(S,N,t)); printf("\nS(%d,%d) = %10.6lg, V=n/a U=%lg\n", N+10, 1, S_S(S,N+10,1), S_U(S,N+10,1)); for (t=2; t<=2*T+10; t++) printf("S(%d,%d) = %10.6lg, V=%lg U=%lg\n", N+10, t, S_S(S,N+10,t), S_V(S,N+10,t), S_U(S,N+10,t)); for (n=T+1; n<=2*N; n++) printf("S(%d,%d..) = %10.6lg %10.6lg\n", n, T, S_S(S,n,T), S_S(S,n,T+1)); for (n=2; n<8; n++ ) { printf("S(%d,%d) = %10.6lg ", n, 1, S_S(S,n,1)); for (t=2; t<=n; t++) printf(" %10.6lg", S_S(S,n,t)); printf("\n"); } for (n=2; n<8; n++ ) { printf("V(%d,%d) = %10.6lg", n, 2, S_V(S,n,2)); for (t=3; t<=n; t++) printf(" %10.6lg", S_V(S,n,t)); printf("\n"); } for (n=2; n<8; n++ ) { printf("U(%d,%d) = %10.6lg", n, 2, S_U(S,n,2)); for (t=3; t<=n; t++) printf(" %10.6lg", S_U(S,n,t)); printf("\n"); } #else { /* * Sample a partition of size T of N by Chinese Rest. distribution * start by sampling the last entry from (1,2,...,N-T+1); * see p(m | CRD, apr, N, T) on page 4 of "doc/alpha.pdf" */ double *prob = malloc(sizeof(*prob)*N); double probtot = 1.0; prob[1] = 1.0; for (t=2; t<=N-T+1; t++) probtot += prob[t] = (N-t+1)*(t-apar) / S_U(S,N-t,T-1)/(t-1) * prob[t-1]; for (t=1; t<=N-T+1; t++) printf("p(m=%d) = %lg\n", t, prob[t]/probtot ); } #endif S_report(S, stdout); S_free(S); return 0; }
int main(int argc, char* argv[]) { int i, j, c, iter, ITER=200; unsigned long int seed=0; int bcycle = 0; float bstart = 0; int acycle = 0; float astart = 0; int burnin = 0; stable_t *ST = NULL; int useN = DIM*2; MAXN = 1; MAXT = MAXSTAB; /* * default values for args */ while ( (c=getopt(argc, argv,"a:b:B:C:I:hH:I:N:P:S:s:T:v"))>=0 ) { switch ( c ) { case 'h': usage(burnin?burnin:ITER/2, ITER, useN); exit(0); case 'b': if ( !optarg || sscanf(optarg,"%f,%f",&bpar, &bstart)<1 ) yaps_quit("Need a valid 'b' argument\n"); break; case 'a': if ( !optarg || sscanf(optarg,"%f,%f",&apar,&astart)<1 ) yaps_quit("Need a valid 'a' argument\n"); break; case 'H': if ( !optarg || sscanf(optarg,"%d",&bcycle)!=1 ) yaps_quit("Need a valid 'G' argument\n"); break; case 'T': if ( !optarg || sscanf(optarg,"%d",&MAXT)!=1 ) yaps_quit("Need a valid 'T' argument\n"); break; case 'I': if ( !optarg || sscanf(optarg,"%d",&acycle)!=1 ) yaps_quit("Need a valid 'H' argument\n"); break; case 'N': if ( !optarg || sscanf(optarg,"%d",&useN)!=1 ) yaps_quit("Need a valid 'N' argument\n"); break; case 'C': if ( !optarg || sscanf(optarg,"%d",&ITER)!=1 ) yaps_quit("Need a valid 'C' argument\n"); break; case 'B': if ( !optarg || sscanf(optarg,"%d",&burnin)!=1 ) yaps_quit("Need a valid 'B' argument\n"); break; case 's': if ( !optarg || sscanf(optarg,"%lu",&seed)!=1 ) yaps_quit("Need a valid 's' argument\n"); break; case 'v': verbose++; break; #ifdef S_USE_THREADS case 'P': if ( !optarg || sscanf(optarg,"%u",&threads)!=1 ) yaps_quit("Need a valid 'P' argument\n"); break; #endif default: yaps_message("Bad command line argument\n\n"); usage(burnin?burnin:ITER/2, ITER, useN); exit(0); } } if ( useN>=MAXDATA ) yaps_quit("N too large\n"); if ( burnin==0 ) burnin = ITER/2; else if ( burnin>=ITER-1 ) yaps_quit("Burnin %d too large for cycles %d\n", burnin, ITER); yaps_message("Configuration details\n"); yaps_message("=====================\n"); /* * set random number generator */ if ( seed ) { rng_seed(rng,seed); } else { rng_time(rng,&seed); } yaps_message("Setting seed for data = %lu\n", seed); if ( acycle && apar==0 ) apar = 0.5; yaps_message("Setting a=%f, b=%f, N=%d, D=%d\n", apar, bpar, useN, NUMMN); yaps_message(" burnin=%d,", burnin); yaps_message(" cycles=%d\n", ITER); /* * fix pointers */ for (j=0; j<NUMMN; j++) { n[j] = &n_data[j*DIM]; t[j] = &t_data[j*DIM]; tave[j] = &tave_data[j*DIM]; } /* * initialise everything */ for (j=0; j<NUMMN; j++) { N[j] = useN; T[j] = 0; Tave[j] = 0; for (i=0; i<DIM; i++) { n[j][i] = 0; t[j][i] = 0; tave[j][i] = 0; } } /* * fix base distribution, uniform */ { for (i=0; i<DIM; i++) { H[i] = 1.0/DIM; } } /* * create data using a CRP to get initialisation for n[] */ c = 0; for (j=0; j<NUMMN; j++) { int cc; i = sampleH(); data[c++] = i; // first entry always adds a table n[j][i]++; t[j][i]++; T[j]++; for (cc=1; cc<N[j]; cc++) { float val = (cc+bpar)*rng_unit(rng); val -= T[j]*apar+bpar; if ( val<=0 ) { // new table i = sampleH(); t[j][i]++; T[j]++; } else { for (i=0; i<DIM; i++) { val -= n[j][i] - t[j][i]*apar; if ( val<0 ) break; } } assert(i<DIM); n[j][i]++; data[c++] = i; } } binit = bpar; /* * record maximum entries in data * do this where possible so that one can get the table * sizes right * */ MAXN = n[0][0]+1; MAXT = 1; for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) { if ( MAXN<=n[j][i] ) MAXN = n[j][i]+1; if ( MAXT<t[j][i] ) MAXT = t[j][i]*1.1+1; } } if ( MAXT>MAXN ) MAXT = MAXN; yaps_message("Making S for N=%d M=%d a=%lf\n", MAXN,MAXT,apar); ST = S_make(MAXN, MAXT, MAXN, MAXTAB, apar, S_STABLE | S_UVTABLE); if ( ST==NULL ) yaps_quit("Making S failed!\n"); S_report(ST,stdout); /* * the seed only sets the data/sample, * the seed for the simulation/Gibbs is always random */ rng_free(rng); rng_time(rng,&seed); //yaps_message("Resetting seed = %lu\n", seed); /* * report on initial data statistics */ yaps_message("\nData sampled\n"); yaps_message("============\n"); for (j=0; j<NUMMN; j++) { yaps_message("n[%d] =", j); for (i=0; i<DIM; i++) yaps_message(" %d", n[j][i]); yaps_message(" = %d\n", N[j]); yaps_message("t[%d] =",j); for (i=0; i<DIM; i++) yaps_message(" %d", t[j][i]); yaps_message(" = %d\n", T[j]); } /* * set the hyperparameters used in Gibbs, * can be different to data */ if ( bstart==0 ) bstart = bpar; if ( astart==0 ) astart = apar; // initialise latent stats and reporting info for (j=0; j<NUMMN; j++) { T[j] = 0; Tave[j] = 0; } tcnt = 0; bave = 0; bcnt = 0; aave = 0; acnt = 0; bpar = bstart; if ( verbose && bcycle!=0 ) yaps_message("Starting with initial b=%f\n", bpar); apar = astart; if ( verbose && acycle!=0 ) yaps_message("Starting with initial a=%f\n", apar); for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) { tave[j][i] = 0; t[j][i] = 0; if ( n[j][i]>0 ) { /* * initialise to a single table */ t[j][i] = 1; T[j]++; } } } for ( iter=0; iter<ITER; iter++) { /* * sampling with table indicators */ c = 0; for (j=0; j<NUMMN; j++) { int cc; for (cc=0; cc<N[j]; cc++) { float one; i = data[c++]; assert(n[j][i]); if ( n[j][i]==1 ) // this indicator must always be 1, no sampling continue; // sample whether it contributes to a table if ( t[j][i]>1 && (n[j][i]-1)*rng_unit(rng)<(t[j][i]-1) ) { t[j][i]--; T[j]--; } assert(t[j][i]<n[j][i]); // sample new table indicator one = H[i] * (bpar + T[j]*apar) * (t[j][i]) / (n[j][i]-t[j][i]+1) * S_V(ST, n[j][i],t[j][i]+1); if ( rng_unit(rng) < one/(one+1.0) ) { t[j][i]++; T[j]++; } } } /* * one major cycle of Gibbs sampler finished */ if ( verbose>1 ) { for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) printf(" %d", t[j][i]); printf(" = %d\n", T[j]); } } /* * sample & record b */ if ( bcycle!=0 && iter%bcycle==0 ) { // Gibbs on bpar (concentration par) too if ( bcycle<0 ) { int bc = -bcycle; for (bc-- ; bc>0; bc--) bpar = sampleb(bpar, 1, PB_shape, PB_scale, N, T, apar, rng, 1, 1); } bpar = sampleb(bpar, 1, PB_shape, PB_scale, N, T, apar, rng, 1, 1); if ( iter>=burnin ) { bave += bpar; bcnt ++; } } /* * sample & record a */ if ( acycle!=0 && iter%acycle==0 ) { int dimI[NUMMN]; double dimb[NUMMN]; for (j=0; j<NUMMN; j++) { dimI[j] = DIM; dimb[j] = bpar; } // Gibbs on apar (discount par) too if ( acycle<0 ) { int bc = -acycle; for (bc-- ; bc>0; bc--) apar = samplea(apar, NUMMN, dimI, T, n, t, NULL, dimb, rng, 1, 1); } apar = samplea(apar, NUMMN, dimI, T, n, t, NULL, dimb, rng, 1, 1); if ( iter>=burnin ) { aave += apar; acnt ++; } if ( verbose>1 ) yaps_message("Extending S for a=%lf\n", apar); if ( S_remake(ST,apar) ) yaps_message("Extending S failed\n"); } /* * full statistics collection */ if ( iter>=burnin ) { for (j=0; j<NUMMN; j++) { for (i=0; i<DIM; i++) { tave[j][i] += t[j][i]; } Tave[j] += T[j]; } tcnt ++; } } /* * report for this experiment */ yaps_message("\nEstimates\n"); yaps_message("=========\n"); for (j=0; j<NUMMN; j++) { yaps_message("t[%d] = ", j); for (i=0; i<DIM; i++) yaps_message(" %.2f", tave[j][i]/tcnt); yaps_message("\nT[%d]=%.2f\n", j, Tave[j]/tcnt); } if ( bcycle!=0 && bcnt>0 ) yaps_message("\nb=%.2f", bave/bcnt); if ( acycle!=0 && acnt>0 ) yaps_message("\na=%.3f", aave/acnt); yaps_message("\n"); S_free(ST); rng_free(rng); return 0; }
/* * assumes s->usedN/M already set to new values and memory filled * startN/M = 0 ---> refill everything * startN/M > 0 ---> memory extended so refill from here, * i.e., these were *last* values set, start +1 */ static int S_remake_part(stable_t *sp, double a, unsigned startN, unsigned startM, unsigned usedN, unsigned usedM, unsigned usedN1) { int N, M; if ( startN==0 ) startM = 0; sp->a = a; sp->lga = lgamma(1.0-a); // yaps_message("S_remake_part(a=%lf,N=%u, M=%u)\n", a, startN, startM); /* * need to reset at sp->S1[] least to usedN1; * up to usedN used by sp->S[][], * and data needs overwriting up to usedN1 */ if ( startN==0 ) { sp->S1[0] = 0; N = 2; } else { N = startN+1; assert(sp->S1[startN-1]>0 ); } for ( ; N<=usedN; N++) sp->S1[N-1] = sp->S1[N-2] + log(N-1-a); if ( startN==0 ) // a has changed, so reset others for ( ; N<=usedN1; N++) sp->S1[N-1] = 0; if ( sp->flags&S_STABLE ) { if ( (sp->flags&S_FLOAT)==0 ) { if ( startM>0 && startM<usedM ) { /* * extend for M upto startN */ for (N=startM+1; N<=startN; N++) { for (M=startM+1; M<N && M<=usedM; M++) { sp->S[N-3][M-2] = logadd(log(N-M*a-1.0)+((M<N-1)?sp->S[N-4][M-2]:0), sp->S[N-4][M-3]); assert(isfinite(sp->S[N-3][M-2])); } } } /* * now fill from 0 after startN ... just like usual */ if ( startN==0 ) { sp->S[0][0] = logadd(sp->S1[1],log(2-2*a)); N = 4; } else { N = startN+1; assert(sp->S[N-4][0]>0); } for (; N<=usedN; N++) { sp->S[N-3][0] = logadd(log(N-2*a-1.0)+sp->S[N-4][0], sp->S1[N-2]); for (M=3; M<=usedM && M<N; M++) { sp->S[N-3][M-2] = logadd(log(N-M*a-1.0)+((M<N-1)?sp->S[N-4][M-2]:0), sp->S[N-4][M-3]); assert(isfinite(sp->S[N-3][M-2])); } } } else { /* * computation done in double by storing in sp->SfrontN+M[] */ if ( startM>0 && startM<usedM ) { /* * extend for M upto startN */ for (N=startM+2; N<=startN; N++) { double lastS; if (startM+1<N-1) lastS = sp->Sf[N-4][startM-1]; else lastS = 0 ; sp->Sf[N-3][startM-1] = sp->SfrontN[startM-1] = logadd(log(N-(startM+1)*a-1.0)+lastS, sp->SfrontM[N-startM-2]); for (M=startM+2; M<N && M<=usedM; M++) { double saveS = sp->SfrontN[M-2]; if ( M==N-1 ) saveS = 0; sp->SfrontN[M-2] = logadd(log(N-M*a-1.0)+saveS, lastS); sp->Sf[N-3][M-2] = sp->SfrontN[M-2]; assert(isfinite(sp->Sf[N-3][M-2])); lastS = saveS; } // save the SfrontM value if ( N>usedM ) sp->SfrontM[N-usedM-1] = sp->SfrontN[usedM-2]; } } if ( startN==0 ) { sp->Sf[0][0] = sp->SfrontN[0] = logadd(sp->S1[1],log(2-2*a)); N = 4; } else { N = startN+1; assert(sp->Sf[N-4][0]>0); } for ( ; N<=usedN; N++) { double lastS; lastS = sp->SfrontN[0]; sp->Sf[N-3][0] = sp->SfrontN[0] = logadd(log(N-2*a-1.0)+lastS, sp->S1[N-2]); for (M=3; M<=usedM && M<N; M++) { double saveS = sp->SfrontN[M-2]; if (M==N-1) saveS = 0; sp->SfrontN[M-2] = logadd(log(N-M*a-1.0)+saveS, lastS); sp->Sf[N-3][M-2] = sp->SfrontN[M-2]; #ifndef NDEBUG if ( !isfinite(sp->Sf[N-3][M-2]) ) yaps_quit("Building '%s' N to %d, sp->Sf[%d][%d] not finite, from %lf,%lf\n", sp->tag, usedN, N-3, M-2, saveS, lastS); #endif assert(isfinite(sp->Sf[N-3][M-2])); lastS = saveS; } // save the SfrontM value if ( N>usedM ) sp->SfrontM[N-usedM-1] = sp->SfrontN[usedM-2]; } } } if ( sp->flags&S_UVTABLE ) { if ( (sp->flags&S_FLOAT)==0 ) { if ( startM>0 && startM<usedM ) { /* * extend for M upto startN */ for (N=startM+1; N<=startN; N++) { for (M=startM+1; M<=N && M<=usedM; M++) { sp->V[N-2][M-2] = (1.0+((M<N)?((N-1-M*a)*sp->V[N-3][M-2]):0)) / (1.0/sp->V[N-3][M-3]+(N-1-(M-1)*a)); } } } /* * now fill from 0 after startN ... just like usual */ if ( startN==0 ) { sp->V[0][0] = 1.0/(1.0-a); N = 3; } else { N = startN+1; assert(sp->V[N-3][0]>0); } for (; N<=usedN; N++) { sp->V[N-2][0] = (1.0+(N-1-2*a)*sp->V[N-3][0])/(N-1-a); for (M=3; M<=usedM && M<=N; M++) { sp->V[N-2][M-2] = (1.0+((M<N)?((N-1-M*a)*sp->V[N-3][M-2]):0)) / (1.0/sp->V[N-3][M-3]+(N-1-(M-1)*a)); } } } else { if ( startM>0 && startM<usedM ) { /* * extend for M upto startN */ for (N=startM+1; N<=startN; N++) { double lastS; if ( startM+1<N) lastS = sp->VfrontN[startM-1]; else lastS = 0; sp->Vf[N-2][startM-1] = sp->VfrontN[startM-1] = (1.0+(N-1-(startM+1)*a)*lastS) / (1.0/sp->VfrontM[N-1-startM]+(N-1-(startM)*a)); for (M=startM+2; M<=N && M<=usedM; M++) { double saveS = sp->VfrontN[M-2]; assert(lastS!=0); sp->Vf[N-2][M-2] = sp->VfrontN[M-2] = (1.0+((M<N)?((N-1-M*a)*saveS):0)) / (1.0/lastS+(N-1-(M-1)*a)); lastS = saveS; } // save the VfrontM value if ( N>=usedM ) sp->VfrontM[N-usedM] = sp->VfrontN[usedM-2]; } } /* * now fill from 0 after startN ... just like usual */ if ( startN==0 ) { sp->Vf[0][0] = sp->VfrontN[0] = 1.0/(1.0-a); N = 3; } else { N = startN+1; assert(sp->Vf[N-3][0]>0); } for (; N<=usedN; N++) { double lastS; lastS = sp->VfrontN[0]; sp->Vf[N-2][0] = sp->VfrontN[0] = (1.0+(N-1-2*a)*lastS)/(N-1-a); for (M=3; M<=usedM && M<=N; M++) { double saveS = sp->VfrontN[M-2]; assert(lastS!=0); sp->Vf[N-2][M-2] = sp->VfrontN[M-2] = (1.0+((M<N)?((N-1-M*a)*saveS):0)) / (1.0/lastS+(N-1-(M-1)*a)); lastS = saveS; } // save the VfrontM value if ( N>=usedM ) sp->VfrontM[N-usedM] = sp->VfrontN[usedM-2]; } } } /* * change bounds at end only after data filled; * in case other threads running */ sp->usedN = usedN; sp->usedN1 = usedN1; sp->usedM = usedM; return 0; }