/* * only used in estimation */ double topicprob(int d, int t, int Ttot) { if ( ddP.theta ) return ddP.theta[d][t]; if ( ddP.PYalpha==H_None ) return ((double)ddS.Ndt[d][t]+ddP.alphapr[t]) / ((double)ddS.NdT[d]+ddP.alphatot); if ( ddP.PYalpha==H_HPDD && ddS.TDt[t]==0 ) { /* * special case for HPDD with a topic with 0 occupancy * to handle the introduction of a new topic into a document; * spread probability over all possible NULL cases */ return (ddP.bpar+ddP.apar*Ttot)/((double)ddP.bpar+ddS.NdT[d]) * (ddP.b0+ddP.a0*ddS.TDTnz)/(ddP.b0+ddS.TDT) /(ddN.T-ddS.TDTnz); } /* * standard PYP result using parent base rate */ if ( ddS.Tdt[d][t]==0 ) { return ((double)ddP.bpar+ddP.apar*Ttot) * alphabasetopicprob(t) / ((double)ddP.bpar+ddS.NdT[d]); } return (ddS.Ndt[d][t] - ddP.apar*ddS.Tdt[d][t] + ((double)ddP.bpar+ddP.apar*Ttot) * alphabasetopicprob(t)) / ((double)ddP.bpar+ddS.NdT[d]); }
static double merge_sumapprox_Tdt(int k) { double x[ddN.DT]; int d; for (d=0; d<ddN.DT; d++) { double xp = 0, xm = 0; int nn, tt, TdT; nn = ddS.Ndt[d][k]; if ( nn<=1 ) { x[d] = 0; continue; } tt = ddS.Tdt[d][k]; TdT = getTdT(d); if ( tt<nn ) xp = (ddP.bpar + ddP.apar*TdT) * S_V(ddC.SX,nn,tt+1) * alphabasetopicprob(k); if ( tt>1) { ddS.TDt[k]--; ddS.TDT--; xm = 1.0/(ddP.bpar + ddP.apar*(TdT-1)) / S_V(ddC.SX,nn,tt) / alphabasetopicprob(k); ddS.TDt[k]++; ddS.TDT++; } if ( xm>xp ) x[d] = xm; else x[d] = xp; } // ????????? }
/* * prob. the doc table indicator is increased, but not forced */ void doctableindicatorprob(int d, int t, int Ttot, double *uone, double *uzero) { int nn = ddS.Ndt[d][t]; int tt = ddS.Tdt[d][t]; double e1, e0; e1 = S_UV(ddC.SX,nn,tt+1); if ( tt==1 ) e0 = nn - ddP.apar; else e0 = S_U(ddC.SX,nn,tt); *uone = e1 * (ddP.bpar+ddP.apar*Ttot) * alphabasetopicprob(t) * (tt+1)/(nn+1); *uzero = e0 * (nn-tt+1)/(nn+1); }
/* * probability of topic given document * * *zerod - set to zero if a new topic is suggested * *tip - set to prob. indicator would be 1 * Ttot - total tables */ double topicfact(int d, int t, int Ttot, uint16_t *zerod, float *tip) { if ( ddP.PYalpha ) { double p; if ( ddP.PYalpha==H_HPDD && ddS.TDt[t]==0 ) { /* * special case for HPDD with a topic with 0 occupancy * to handle the introduction of a new topic into a document */ if ( *zerod ) { /* want to only do first time */ p = (ddP.bpar+ddP.apar*Ttot) * (ddP.b0+ddP.a0*ddS.TDTnz)/(ddP.b0+ddS.TDT); *zerod = 0; } else { /* subsequent times we set it to zero */ p = 0; } *tip = 1.0; return p; } if ( ddS.Tdt[d][t]==0 ) { #ifndef NDEBUG if ( ddS.Ndt[d][t]>0 ) { check_Ndt(d); assert(ddS.Ndt[d][t]==0); } #endif p = ((double)ddP.bpar+ddP.apar*Ttot) * alphabasetopicprob(t); *tip = 1.0; } else { double uone, uzero; #ifndef NDEBUG if ( ddS.Ndt[d][t]==0 ) { check_Ndt(d); assert(ddS.Ndt[d][t]>0); } #endif doctableindicatorprob(d, t, Ttot, &uone, &uzero); p = uone + uzero; *tip = uone/(uone + uzero); } return p; } return ((double)ddS.Ndt[d][t]+ddP.alphapr[t]); }
void get_probs(double *vp) { int zerod = 1; int t; double tot = 0; if ( ddP.PYalpha==0 ) { get_probs_alpha(vp); return; } for (t=0; t<ddN.T; t++) { if ( ddP.PYalpha!=H_HPDD || ddS.TDt[t]>0 || zerod ) tot += vp[t] = alphabasetopicprob(t); else vp[t] = 0; if ( ddP.PYalpha==H_HPDD && ddS.TDt[t]==0 ) zerod = 0; } #ifndef NDEBUG if ( fabs(tot-1.0)>1e-4 ) { yap_message("get_probs() probs doesn't normalise, get %lf\n", tot); } #endif for (t=0; t<ddN.T; t++) vp[t] /= tot; }