double likemerge_DIRalpha(k1,k2) { /* * Dirichlet for topics */ int i,t; double likelihood = 0; for (i=0; i<ddN.DT; i++) { if ( ddS.Ndt[i][k2]>0 ) { likelihood += gammadiff((int)ddS.Ndt[i][k1]+ddS.Ndt[i][k2], ddP.alphapr[k1], 0.0) - gammadiff((int)ddS.Ndt[i][k1], ddP.alphapr[k1], 0.0) - gammadiff((int)ddS.Ndt[i][k2], ddP.alphapr[k2], 0.0); } } yap_infinite(likelihood); return likelihood; }
// likelihood_DIRbeta() static double likemerge_DIRbeta(int k1, int k2) { int j; double val = 0; for (j=0; j<ddN.W; j++) { if ( ddS.Nwt[j][k2]>0 ) { assert(ddP.betapr[j]>0); val += (gammadiff((int)ddS.Nwt[j][k1]+ddS.Nwt[j][k2], ddP.betapr[j], 0.0) - gammadiff((int)ddS.Nwt[j][k1], ddP.betapr[j], 0.0) - gammadiff((int)ddS.Nwt[j][k2], ddP.betapr[j], 0.0)); } } val -= (gammadiff((int)ddS.NWt[k1]+ddS.NWt[k2], ddP.betatot, 0.0) - gammadiff((int)ddS.NWt[k1], ddP.betatot, 0.0) - gammadiff((int)ddS.NWt[k2], ddP.betatot, 0.0)); yap_infinite(val); return val; }
double likemerge_DIRbeta(k1,k2) { int j,t; double likelihood = 0; double val = 0; for (j=0; j<ddN.W; j++) { if ( ddS.Nwt[j][k2]>0 ) { assert(ddP.betapr[j]>0); val += gammadiff((int)ddS.Nwt[j][k1]+ddS.Nwt[j][k2], ddP.betapr[j], 0.0) - gammadiff((int)ddS.Nwt[j][k1], ddP.betapr[j], 0.0) - gammadiff((int)ddS.Nwt[j][k2], ddP.betapr[j], 0.0); } } val -= gammadiff((int)ddS.NWt[k1]+ddS.NWt[k2], ddP.betatot, 0.0) - gammadiff((int)ddS.NWt[k1], ddP.betatot, 0.0) - gammadiff((int)ddS.NWt[k2], ddP.betatot, 0.0); likelihood += val; yap_infinite(likelihood); return likelihood; }
double likelihood_PYbeta() { int i,t; double likelihood = 0; double lbw = log(ddP.bwpar); double law = log(ddP.awpar); likelihood += pctl_gammaprior(ddP.bwpar); /* * term for k-th node */ #ifdef BWPAR0 for (t=1; t<ddN.T; t++) { #else for (t=0; t<ddN.T; t++) { #endif uint32_t Tw_ = 0; for (i=0; i<ddN.W; i++) { int tt = ddS.Twt[i][t]; int nn = ddS.Nwt[i][t]; if ( nn>0 ) { Tw_ += tt; likelihood += S_S(ddC.SY,nn,tt); #if 1 if ( !finite(likelihood) || isinf(likelihood) || isnan(likelihood) ) yap_quit("Like=%lf: Nwt[%d][%d]=%d Twt[i][t]=%d S.M=%d S.N=%d\n", likelihood, i, t, (int)ddS.Nwt[i][t],(int)ddS.Twt[i][t],ddC.SY->usedM, ddC.SY->usedN); #endif } } yap_infinite(likelihood); if ( ddP.awpar==0 ) { likelihood += Tw_*lbw; } else { #ifdef L_CACHE likelihood += Tw_*law + gcache_value(&ddC.lgbaw, (int)Tw_); #else likelihood += Tw_*law + gammadiff((int)Tw_, ddP.bwpar/ddP.awpar, 0.0); #endif } #ifdef L_CACHE likelihood -= gcache_value(&ddC.lgbw, (int)ddS.NWt[t]); #else likelihood -= gammadiff((int)ddS.NWt[t], ddP.bwpar, 0.0); #endif yap_infinite(likelihood); } yap_infinite(likelihood); return likelihood; } double likelihood_PYbeta_PDP() { /* * the constant prior */ int j; double likelihood = 0; for (j=0; j<ddN.W; j++) { if ( ddS.TwT[j]>0 ) { likelihood += ddS.TwT[j]*log(ddP.betapr[j]); } } //yap_infinite(likelihood); return likelihood; }
/******************************** * code for LDA *****************************/ double gibbs_lda(/* * fix==GibbsNone for standard ML training/testing * fix==GibbsHold for word hold-out testing, * same as GibbsNone but also handles * train and test words differently */ enum GibbsType fix, int did, // document index int words, // do this many float *p, // temp store D_MiSi_t *dD ) { int i, wid, t, mi=0; int e; double Z, tot; double logdoc = 0; int logdocinf = 0; int StartWord = ddD.N_dTcum[did]; int EndWord = StartWord + words; float dtip[ddN.T]; #ifdef MH_STEP double doc_side_cache[ddN.T]; for (t=0; t<ddN.T; t++) doc_side_cache[t] = doc_side_fact(did,t); #endif /* * some of the latent variables are not sampled * are kept in the testing version, uses enum GibbsType * fix = global document setting * fix_doc = settings for word in this doc * * NB. if fix==GibbsNone, then fix_doc==fix * if fix==GibbsHold then fix_doc==GibbsHold or GibbsNone */ enum GibbsType fix_doc = fix; if ( PCTL_BURSTY() ) { mi = ddM.MI[did]; } e = ddD.e[did]; for (i=StartWord; i<EndWord; i++) { #ifdef MH_STEP int oldt; #endif if ( fix==GibbsHold ) { if ( pctl_hold(i) ) fix_doc = GibbsHold; // this word is a hold out else fix_doc = GibbsNone; } // check_m_vte(e); wid=ddD.w[i]; /******************* * first we remove affects of this word on the stats *******************/ #ifdef MH_STEP oldt = #endif t = Z_t(ddS.z[i]); if ( fix_doc!=GibbsHold ) { if ( remove_topic(i, did, (!PCTL_BURSTY()||Z_issetr(ddS.z[i]))?wid:-1, t, mi, dD) ) { goto endword; } } /*********************** * get topic probabilities ***********************/ // check_m_vte(e); #ifdef MU_CACHE mu_side_fact_update(e); #endif #ifdef PHI_CACHE phi_norm_update(wid, e); phi_sum_update(wid, e, i); #endif for (t=0, Z=0, tot=0; t<ddN.T; t++) { #ifdef MH_STEP int saveback = ddP.back; if ( fix_doc!=GibbsHold ) ddP.back = 0; #endif /* * (fix_doc==GibbsHold) => * doing estimation, not sampling so use prob versions * else * doing sampling so use fact versions */ #ifdef MH_STEP double tf = (fix_doc==GibbsHold)?doc_side_prob(did,t): doc_side_cache[t]; if ( tf>0 ) { double wf = (fix_doc==GibbsHold)?word_side_prob(e, wid, t): word_side_fact(e, wid, t); #else double tf = (fix_doc==GibbsHold)?doc_side_prob(did,t): doc_side_fact(did,t); if ( tf>0 ) { double wf = (fix_doc==GibbsHold)?word_side_prob(e, wid, t): word_side_fact(e, wid, t); #endif tot += tf; if ( PCTL_BURSTY() ) wf = (fix_doc==GibbsHold)?docprob(dD, t, i, mi, wf): docfact(dD, t, i, mi, wf, &dtip[t]); Z += p[t] = tf * wf; } else p[t] = 0; #ifdef MH_STEP ddP.back = saveback; #endif } if ( fix!=GibbsHold || fix_doc==GibbsHold ) logdoc += log(Z/tot); if ( logdocinf==0 ) if ( !finite(logdoc) ) { logdocinf++; yap_infinite(logdoc); } /******************* * now sample t using p[] and install affects of this on the stats; * but note this needs indicator to be set! *******************/ if ( fix_doc!=GibbsHold ) { /* * sample and update core stats */ t = samplet(p, Z, ddN.T, rng_unit(rngp)); #ifdef MH_STEP if ( t != oldt ) { double ratio = p[oldt]/p[t]; if ( PCTL_BURSTY() ) { ratio *= docfact(dD, t, i, mi, word_side_fact(e, wid, t), &dtip[t]) * doc_side_fact(did,t); ratio /= docfact(dD, oldt, i, mi, word_side_fact(e, wid, oldt), &dtip[oldt]) * doc_side_fact(did,oldt); } else { ratio *= word_side_fact(e, wid, t) * doc_side_fact(did, t); ratio /= word_side_fact(e, wid, oldt) * doc_side_fact(did, oldt); } if ( ratio<1 && ratio<rng_unit(rngp) ) t = oldt; } #endif Z_sett(ddS.z[i],t); #ifdef TRACE_WT if ( wid==TR_W && t==TR_T ) yap_message("update_topic(w=%d,t=%d,d=%d,l=%d,z=%d,N=%d,T=%d)\n", wid,t,did,i,ddS.z[i], (int)ddS.m_vte[wid][t][e],(int)ddS.s_vte[wid][t][e]); #endif update_topic(i, did, wid, t, mi, dtip[t], dD); #ifdef TRACE_WT if ( wid==TR_W && t==TR_T ) yap_message("after update_topic(w=%d,t=%d,d=%d,l=%d,z=%d,N=%d,T=%d)\n", wid,t,did,i,ddS.z[i], (int)ddS.m_vte[wid][t][e],(int)ddS.s_vte[wid][t][e]); #endif } endword: if ( PCTL_BURSTY() && M_multi(i) ) { mi++; } } return logdoc; }
/* * compute likelihood ratio difference based on *M */ static double merge_like_Tdt_sum(int k1, int k2, merge_alpha_t *M) { double *val; int d; val = dvec(ddN.DT); for (d=0; d<ddN.DT; d++) { } #endif static double merge_like_Tdt(int k1, int k2, merge_alpha_t *M) { int d; double la = 0; double lb = log(ddP.bpar); int TD_diff = 0; double likelihood = 0; if ( ddP.apar>0 ) la = log(ddP.apar); for (d=0; d<ddN.DT; d++) { int Td_diff; /* total change in T for doc */ if ( M->Ndt[d]>1 ) { likelihood -= S_S(ddC.SX,ddS.Ndt[d][k2],ddS.Tdt[d][k2]); likelihood -= S_S(ddC.SX,ddS.Ndt[d][k1],ddS.Tdt[d][k1]); likelihood += S_S(ddC.SX,M->Ndt[d],M->Tdt[d]); assert(M->Tdt[d]>=1); assert(M->Tdt[d]<=M->Ndt[d]); assert(ddS.Ndt[d][k2]==0 || ddS.Tdt[d][k2]>0); assert(ddS.Ndt[d][k1]==0 || ddS.Tdt[d][k1]>0); } yap_infinite(likelihood); TD_diff += Td_diff = (M->Tdt[d]-ddS.Tdt[d][k2]-ddS.Tdt[d][k1]); if ( Td_diff==0 ) continue; if ( ddP.apar==0 ) { likelihood += Td_diff*lb; } else { likelihood += Td_diff*la; if ( Td_diff<0 ) likelihood -= gammadiff(-Td_diff,M->TdT[d]+ddP.bpar/ddP.apar, 0.0); else likelihood += gammadiff(Td_diff, M->TdT[d]-Td_diff+ddP.bpar/ddP.apar, 0.0); } yap_infinite(likelihood); } if ( ddP.PYalpha==H_PDP ) { likelihood += (M->TDt-ddS.TDt[k1])*log(ddP.alphapr[k1]) - ddS.TDt[k2]*log(ddP.alphapr[k2]); } else if ( ddP.PYalpha==H_HDP ) { likelihood += lgamma(M->TDTm+M->TDt-TD_diff+ddP.b0) - lgamma(M->TDTm+M->TDt+ddP.b0); likelihood -= gammadiff(ddS.TDt[k1], ddP.b0*ddP.alphapr[k1], 0.0); likelihood -= gammadiff(ddS.TDt[k2], ddP.b0*ddP.alphapr[k2], 0.0); likelihood += gammadiff(M->TDt, ddP.b0*ddP.alphapr[k1], 0.0); } else { double lga0 = lgamma(1-ddP.a0); likelihood += lgamma(M->TDTm+M->TDt-TD_diff+ddP.b0) - lgamma(M->TDTm+M->TDt+ddP.b0); /* because k2 gone to zero, so one less topic */ likelihood -= log(ddP.b0+ddP.a0*(ddS.TDTnz-1)); if ( ddS.TDt[k2]>1 ) likelihood -= lgamma(ddS.TDt[k2]-ddP.a0) - lga0; if ( ddS.TDt[k1]>1 ) likelihood -= lgamma(ddS.TDt[k1]-ddP.a0) - lga0; likelihood += lgamma(M->TDt-ddP.a0) - lga0; } yap_infinite(likelihood); return likelihood; }
/* * compute likelihood ratio difference based on *M */ static double merge_like_Twt(int k1, int k2, merge_beta_t *M) { int i, w; double likelihood = 0; #ifndef BWPAR0 double lbw = log(ddP.bwpar); #endif double law = log(ddP.awpar); double TW_diff = 0; #ifdef BWPAR0 yap_quit("BWPAR0 unimpleented in merge\n"); #endif for (i=0; i<ddN.W; i++) { likelihood -= S_S(ddC.SY,ddS.Nwt[i][k1],ddS.Twt[i][k2]); likelihood -= S_S(ddC.SY,ddS.Nwt[i][k1],ddS.Twt[i][k2]); likelihood += S_S(ddC.SY,M->Nwt[i],M->Twt[i]); } if ( ddP.awpar==0 ) { #ifdef BWPAR0 likelihood += M->TWt-ddS.TWt[k1]*log(ddP_bwpar(k1)) -ddS.TWt[k2]*log(ddP_bwpar(k2)); #else likelihood += (M->TWt-ddS.TWt[k1]-ddS.TWt[k2])*lbw; #endif } else { likelihood += (M->TWt-ddS.TWt[k1]-ddS.TWt[k2])*law + gammadiff((int)M->TWt, ddP.bwpar/ddP.awpar, 0.0) - gammadiff((int)ddS.TWt[k1], ddP_bwpar(k1)/ddP.awpar, 0.0) - gammadiff((int)ddS.TWt[k2], ddP_bwpar(k2)/ddP.awpar, 0.0); } likelihood += gammadiff((int)ddS.NWt[k1], ddP_bwpar(k1), 0.0); likelihood += gammadiff((int)ddS.NWt[k2], ddP_bwpar(k2), 0.0); likelihood -= gammadiff((int)M->NWt, ddP.bwpar, 0.0); yap_infinite(likelihood); if ( ddP.PYbeta==H_PDP ) { for (w=0; w<ddN.W; w++) { if ( ddS.TwT[w]>0 ) { // ??????????????? likelihood += ddS.TwT[w]*log(ddP.betapr[w]); } } } else if ( ddP.PYbeta==H_HDP ) { yap_quit("merge with PYbeta unimplemented\n"); likelihood += lgamma(M->TWTm+M->TWt-TW_diff+ddP.bw0) - lgamma(M->TWTm+M->TWt+ddP.bw0); for (w=0; w<ddN.W; w++) { // ??????????? likelihood -= gammadiff(ddS.TWt[k1], ddP.bw0*ddP.betapr[k1], 0.0); likelihood -= gammadiff(ddS.TWt[k2], ddP.bw0*ddP.betapr[k2], 0.0); likelihood += gammadiff(M->TWt, ddP.bw0*ddP.betapr[k1], 0.0); } } else { double lgaw0 = lgamma(1-ddP.aw0); likelihood += lgamma(M->TWTm+M->TWt-TW_diff+ddP.bw0) - lgamma(M->TWTm+M->TWt+ddP.bw0); /* because k2 gone to zero, so one less topic */ likelihood -= log(ddP.bw0+ddP.aw0*(ddS.TWTnz-1)); if ( ddS.TWt[k2]>1 ) likelihood -= lgamma(ddS.TWt[k2]-ddP.aw0) - lgaw0; if ( ddS.TWt[k1]>1 ) likelihood -= lgamma(ddS.TWt[k1]-ddP.aw0) - lgaw0; likelihood += lgamma(M->TWt-ddP.aw0) - lgaw0; } yap_infinite(likelihood); return likelihood; }