double MvtNuTF::Loglike(const Vector &Nu, Vector &g, uint nd) const { const std::vector<Ptr<VectorData> > &dat(mod->dat()); double ldsi = mod->ldsi(); const SpdMatrix &Siginv(mod->siginv()); const Vector &mu(mod->mu()); const double logpi = 1.1447298858494; double nu = Nu[0]; double lognu = log(nu); uint n = dat.size(); uint d = mu.size(); double half_npd = .5 * (nu + d); double ans = lgamma(half_npd) - lgamma(nu / 2) - .5 * d * (lognu + logpi); ans += .5 * ldsi + half_npd * lognu; ans *= n; if (nd > 0) { g[0] = .5 * (digamma(half_npd) - digamma(nu / 2.0) - d / nu); g[0] += half_npd / nu + .5 * lognu; g[0] *= n; } for (uint i = 0; i < n; ++i) { double delta = Siginv.Mdist(mu, dat[i]->value()); double npd = nu + delta; ans -= half_npd * log(npd); if (nd > 0) { g[0] -= half_npd / npd + .5 * log(npd); } } return ans; }
void set_rule_weights(grammar g, FLOAT *rule_counts, int VariationalBayes) { FLOAT *parent_sum = MALLOC((g->nnts+1)*sizeof(FLOAT)); size_t i; for (i=0; i<=g->nnts; i++) parent_sum[i] = 0.0; for (i=0; i<g->nrules; i++) { assert(g->rules[i]->e[0] <= g->nnts); assert(rule_counts[i] >= 0.0); parent_sum[g->rules[i]->e[0]] += rule_counts[i]; } for (i=0; i<g->nrules; i++) { if (rule_counts[i] > 0.0) { if (VariationalBayes) g->weights[i] = exp(digamma(rule_counts[i]) - digamma(parent_sum[g->rules[i]->e[0]])); else g->weights[i] = rule_counts[i]/parent_sum[g->rules[i]->e[0]]; } else g->weights[i] = 0.0; } FREE(parent_sum); }
double var_bayes::compute_likelihood(const document &doc, const std::vector<double> &var_gamma, const std::vector<std::vector<double>> &phi) { double likelihood = 0; double var_gamma_sum = 0; std::vector<double> dig(numTopics); for(int k=0; k< numTopics; k++){ dig[k] = digamma(var_gamma[k]); var_gamma_sum += var_gamma[k]; } double digsum = digamma(var_gamma_sum); for(int k=0; k<numTopics; k++){ likelihood += lgamma(alpha.alpha[k] * numTopics); likelihood -= numTopics * lgamma(alpha.alpha[k]) - lgamma(var_gamma_sum); likelihood += (alpha.alpha[k] - 1)*(dig[k] - digsum); likelihood += lgamma(var_gamma[k]) - (var_gamma[k] - 1)*(dig[k] - digsum); int n=0; for(auto const& word_count: doc.wordCounts){ if(phi[n][k] > 0){ likelihood += word_count.second * ( phi[n][k] * ( (dig[k] - digsum)-log(phi[n][k])+logProbW[k][word_count.first])); } n++; } } return likelihood; }
// First order derivative of the Student-t distribution // with respect to the degree of freedom: double ddf_pt(double x, double df) { double epsabs=0.0, epsrel=0.0, integ=0.0, tinteg=0.0; double abserr=0.0, origin=0.0, q=0.0, res=0.0, *work; int inf=0.0, neval=0.0, ier=0.0, limit=0.0, lenw=0.0; int last=0.0, *iwork; // Initialize the parameters and variable for the integrate fun: inf=-1; epsabs=1e-5; epsrel=1e-5; limit=100; lenw=4*limit; iwork=(int *) R_alloc(limit,sizeof(int)); work=(double *) R_alloc(lenw,sizeof(double)); // Checks the sign of the argument: if(x <= 0) Rdqagi(integr_pt,(void*)&df,&x,&inf,&epsabs,&epsrel, &integ,&abserr,&neval,&ier,&limit,&lenw,&last,iwork,work); else { q=-x; Rdqagi(integr_pt,(void*)&df,&origin,&inf,&epsabs,&epsrel, &tinteg,&abserr,&neval,&ier,&limit,&lenw,&last,iwork,work); Rdqagi(integr_pt,(void*)&df,&q,&inf,&epsabs,&epsrel,&integ,&abserr, &neval,&ier,&limit,&lenw,&last,iwork,work); integ=2*tinteg-integ; } res=0.5*pt(x,df,1,0)*(digamma(0.5*(df+1))-digamma(0.5*df)-1/df)+integ; return res; }
double compute_likelihood(document* doc, lda_model* model, double** phi, double* var_gamma) { double likelihood = 0, digsum = 0, var_gamma_sum = 0, dig[model->num_topics]; int k = 0, n = 0, index = 0; memset(dig,0.0,sizeof(dig)); for (k = 0; k < model->num_topics; k++) { dig[k] = digamma(var_gamma[k]); var_gamma_sum += var_gamma[k]; } digsum = digamma(var_gamma_sum); likelihood = lgamma(model->alpha * model->num_topics) - model->num_topics * lgamma(model->alpha) - lgamma(var_gamma_sum); for (k = 0; k < model->num_topics; k++) { likelihood += (model->alpha - 1)*(dig[k] - digsum) + lgamma(var_gamma[k]) - (var_gamma[k] - 1)*(dig[k] - digsum); for (n = 0; n < doc->length; n++) { if (phi[n][k] > 0) { index = doc->words[n]; likelihood += doc->counts[n]* (phi[n][k]*((dig[k] - digsum) - log(phi[n][k]) + model->log_prob_w[k][index])); } } } return(likelihood); }
double doc_e_step(document* doc, double* gamma, double** phi, lda_model* model, lda_suffstats* ss) { double likelihood; int n, k; short error = 0; // posterior inference likelihood = lda_inference(doc, model, gamma, phi, &error); if (error) { likelihood = 0.0; } // update sufficient statistics double gamma_sum = 0; for (k = 0; k < model->num_topics; k++) { gamma_sum += gamma[k]; ss->alpha_suffstats += digamma(gamma[k]); } ss->alpha_suffstats -= model->num_topics * digamma(gamma_sum); for (n = 0; n < doc->length; n++) { for (k = 0; k < model->num_topics; k++) { ss->class_word[k][doc->words[n]] += doc->counts[n]*phi[n][k]; ss->class_total[k] += doc->counts[n]*phi[n][k]; } } ss->num_docs = ss->num_docs + 1; return(likelihood); }
double lda_inference(document* doc, lda_model* model, double* var_gamma, double** phi) { double converged = 1; double phisum = 0, likelihood = 0; double likelihood_old = 0, oldphi[model->num_topics]; int k, n, var_iter; double digamma_gam[model->num_topics]; // compute posterior dirichlet for (k = 0; k < model->num_topics; k++) { var_gamma[k] = model->alpha + (doc->total/((double) model->num_topics)); digamma_gam[k] = digamma(var_gamma[k]); for (n = 0; n < doc->length; n++) phi[n][k] = 1.0/model->num_topics; } var_iter = 0; while ((converged > VAR_CONVERGED) && ((var_iter < VAR_MAX_ITER) || (VAR_MAX_ITER == -1))) { var_iter++; for (n = 0; n < doc->length; n++) { phisum = 0; for (k = 0; k < model->num_topics; k++) { oldphi[k] = phi[n][k]; phi[n][k] = digamma_gam[k] + model->log_prob_w[k][doc->words[n]]; if (k > 0) phisum = log_sum(phisum, phi[n][k]); else phisum = phi[n][k]; // note, phi is in log space } for (k = 0; k < model->num_topics; k++) { phi[n][k] = exp(phi[n][k] - phisum); var_gamma[k] = var_gamma[k] + doc->counts[n]*(phi[n][k] - oldphi[k]); // !!! a lot of extra digamma's here because of how we're computing it // !!! but its more automatically updated too. digamma_gam[k] = digamma(var_gamma[k]); } } likelihood = compute_likelihood(doc, model, phi, var_gamma); assert(!isnan(likelihood)); converged = (likelihood_old - likelihood) / likelihood_old; likelihood_old = likelihood; // printf("[LDA INF] %8.5f %1.3e\n", likelihood, converged); } return(likelihood); }
// First order derivative of the Student-t probability density function // with respect to the degree of freedom with argument g(x): double ddf_t_dt(double x, double clc, double df, double somc2) { double df1=df+1, res=0.0, nu=df-1, y=0.0; y=1+pow(x,2)/df; res=0.5*dt(x,df,0)*(digamma(0.5*df1)-digamma(0.5*df)- log(y)-1/df+2*df1*x*clc/nu/sqrt(df)/somc2/y); return res; }
// First order derivative of the first order derivative of the // Student-t probability density function (with respect to the arg) // with respect to the degree of freedom with argument g(x): double ddf_t_d1x_dt(double x, double clc, double df, double somc2) { double df1=df+1, nu=df-1, res=0.0, sdf=sqrt(df), y=0.0; y=1+pow(x,2)/df; res=0.5*d1x_dt(x,df)*(digamma(0.5*df1)-digamma(0.5*df)- log(y)-2/df+2*x*clc*(df1+2)/nu/sdf/somc2/y+ 2/df1-2*clc*sdf/nu/x/somc2); return res; }
void fix_lambda(int ncentroids, long datalen, double *prior_alpha, double *log_lambda) { register int i; double correction; correction = log(1 - exp(digamma(*prior_alpha) - digamma(1 + *prior_alpha))); for (i=0; i<datalen; i++) { log_lambda[(ncentroids-1)*datalen + i] -= correction; } return; }
void diff_t_nu_nu(double* x, double* nu, double* out) { double xmax=0; double *inbeder_out; inbeder_out=Calloc(3,double); double t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14; double x_help; if(*x>=0) { x_help=*x; } else { x_help=-*x; } xmax=(*nu)/((*nu)+x_help*x_help); t1=1.0/(x_help*x_help+*nu); t2=*nu/2.0; t3=0.5; inbeder(&xmax,&t2,&t3,inbeder_out); t4=(*nu+1.0)/2.0; t5=pow(*nu,*nu/2.0-1.0)*x_help; t6=pow(t1,t4); t7=beta(t2,0.5); t8=t5*t6; t9=*nu*t1; t11=digamma(0.5*(*nu)); t12=digamma(0.5*(*nu)+0.5); t13=t11-t12; t14=1.0/t7; t10=-t1*t4 + (t2-1.0)/(*nu) + 0.5*log(t1) + 0.5*log(*nu); t1=inbeder_out[2]; t2=t8*log(t9)/t7; t3=t13*t8/t7; t4=t8/t7*t10; out[0]= - 1.0/8.0*inbeder_out[2] + t8*t14*( -0.25 * log(t9) +0.5* t13 - 0.5*t10 ); if(*x<0) { out[0]=-out[0]; } Free(inbeder_out); }
logweight<Real> operator()(logweight<Real> const& x) const { if (linear) return x; typedef logweight<Real> W; double xa=x.getReal()+alpha; const double floor=.0002; static const W dig_floor(digamma(floor),false); if (xa < floor) // until we can compute digamma in logspace, this will be the answer. and, can't ask digamma(0), because it's negative inf. but exp(-inf)=0 return dig_floor*(xa/floor); // this is a mistake: denominator of sum of n things is supposed to get (alpha*n + sum), not (alpha+sum). but it seems to work better (sometimes) return W(digamma(xa),false); }
double ddf_dt(double x, double df) { double df1=0.0, res=0.0, x2=0.0, y=0.0; df1 = df + 1; x2 = pow(x, 2); y = 1 + x2 / df; res = dts(x, df) * (digamma(df1 / 2) - digamma(df / 2) - 1 / df + df1 * x2 / pow(df, 2) / y - log(y)) / 2; return res; }
void UpdateBeta(vec& beta, mat& rho_m, int V, int K){ double NEWTON_THRESH = 0.00001; int MAX_ITER = 1000; double gamma = 0.001; vec df(V, fill::zeros); vec g(V, fill::zeros); vec h(V, fill::zeros); int iter = 0; do{ // compute the first derivative double digamma_beta = digamma(sum(beta)); double digamma_theta = 0; for(int k = 0; k < K; k++){ digamma_theta += digamma(sum(rho_m.row(k))); } for(int w = 0; w < V; w++){ double temp = 0; for(int k = 0; k < K; k++){ temp += digamma(rho_m(k, w)); } g(w) = K * (digamma_beta - digamma(beta(w))) + temp - digamma_theta; } cout << "this is g" << endl; cout << g.t() << endl; // compute the Hessian double trigamma_beta = trigamma(sum(beta)); for(int w = 0; w < V; w++){ h(w) = K * trigamma(beta(w)); } cout << "this is h" << endl; cout << h.t() << endl; // compute constant terms needed for gradient double c = sum(g / h) / (- 1 / trigamma_beta + sum(1 / h)); for(int w = 0; w < V; w++){ df(w) = (g(w) - c) / h(w); } beta -= gamma * df; iter++; cout << "iteration: " << iter << endl; cout << beta.t() << endl; } while(iter < MAX_ITER && max(abs(df)) > NEWTON_THRESH); return; }
double ddf_d1x_dt(double x, double df) { double df1=0.0, df2=0.0, res=0.0, x2=0.0, xdf=0.0, y=0.0; df1 = df + 1; df2 = pow(df, 2); x2 = pow(x, 2); y = 1 + x2 / df; xdf = x / df; res = - dts(x, df) * xdf * (df1 / 2 * (digamma(df1 / 2) + pow(xdf, 2) * (df + 3) / y - digamma(df / 2) - log(y) - 3 / df) + 1) / y; return res; }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { if (nrhs != 1 ) mexErrMsgTxt( "Wrong number of input arguments." ); if (nlhs > 1 ) mexErrMsgTxt( "Too many output arguments." ); { double *x, *y; const int *dims; int len, i; dims = mxGetDimensions(prhs[0]); len = dims[0]*dims[1]; plhs[0] = mxCreateDoubleMatrix(dims[0], dims[1], mxREAL); x = mxGetPr(prhs[0]); y = mxGetPr(plhs[0]); for (i = 0; i < len; i++) { y[i] = digamma(x[i]); } } return; }
void compute_lambda_statistics(t_tilda_var_model* model, double** expected_beta) { for (int i = 0; i < model->num_topics; ++i) { double lambda_sum = 0.0; for (int v = 0; v < model->num_terms; ++v) { digamma_lambda[i][v] = digamma(model->lambda[i][v]); lambda_sum += model->lambda[i][v]; } digamma_lambda_sum[i] = digamma(lambda_sum); for (int v = 0; v < model->num_terms; ++v) { expected_beta[i][v] = model->lambda[i][v] / lambda_sum; } } }
double LDA::Infer(CorpusC &cor, int d, const LdaModel &m, VReal* ga, VVReal* phi) const { VReal digamma(m.num_topics); double likelihood_old = 0; double c = 1; InitVarParamter(cor, d, m, &digamma, ga, phi); for(int it = 1; (c > var_converged_) && (it < var_max_iter_); ++it) { for (size_t n = 0; n < cor.ULen(d); n++) { for (int k = 0; k < m.num_topics; k++) { (*phi)[n][k] = digamma[k] + m.log_prob_w[k][cor.Word(d, n)]; } double log_phi_sum = LogPartition(phi->at(n)); for (int k = 0; k < m.num_topics; k++) { (*phi)[n][k] = exp((*phi)[n][k] - log_phi_sum); } } for (size_t i = 0; i < ga->size(); i++) { ga->at(i) = m.alpha[i]; } for (size_t n = 0; n < cor.ULen(d); n++) { for (int k = 0; k < m.num_topics; k++) { (*ga)[k] += cor.Count(d, n) * (*phi)[n][k]; digamma[k] = DiGamma(ga->at(k)); } } double likelihood = Likelihood(cor, d, m, *ga, *phi); assert(!isnan(likelihood)); c = (likelihood_old - likelihood) / likelihood_old; likelihood_old = likelihood; } return likelihood_old; }
inline var<AutodiffOrder, StrictSmoothness, ValidateIO> lgamma(const var<AutodiffOrder, StrictSmoothness, ValidateIO>& input) { if (ValidateIO) validate_input(input.first_val(), "lgamma"); const short partials_order = 3; const unsigned int n_inputs = 1; create_node<unary_var_node<AutodiffOrder, partials_order>>(n_inputs); double val = input.first_val(); try { push_dual_numbers<AutodiffOrder, ValidateIO>(lgamma(val)); } catch (nomad_error) { throw nomad_output_value_error("lgamma"); } push_inputs(input.dual_numbers()); try { if (AutodiffOrder >= 1) push_partials<ValidateIO>(digamma(val)); if (AutodiffOrder >= 2) push_partials<ValidateIO>(trigamma(val)); if (AutodiffOrder >= 3) push_partials<ValidateIO>(quadrigamma(val)); } catch (nomad_error) { throw nomad_output_partial_error("lgamma"); } return var<AutodiffOrder, StrictSmoothness, ValidateIO>(next_node_idx_ - 1); }
/* The digamma function is the derivative of gammaln. Reference: J Bernardo, Psi ( Digamma ) Function, Algorithm AS 103, Applied Statistics, Volume 25, Number 3, pages 315-317, 1976. From http://www.psc.edu/~burkardt/src/dirichlet/dirichlet.f (with modifications for negative numbers and extra precision) */ double digamma(double x) { double result; static const double neginf = -1.0/0.0, c = 12, s = 1e-6, d1 = -0.57721566490153286, d2 = 1.6449340668482264365, /* pi^2/6 */ s3 = 1./12, s4 = 1./120, s5 = 1./252, s6 = 1./240, s7 = 1./132; /* s8 = 691/32760, */ /* s9 = 1/12, */ /* s10 = 3617/8160; */ /* Illegal arguments */ if((x == neginf) || isnan(x)) { return 0.0/0.0; } /* Singularities */ if((x <= 0) && (floor(x) == x)) { return neginf; } /* Negative values */ /* Use the reflection formula (Jeffrey 11.1.6): * digamma(-x) = digamma(x+1) + pi*cot(pi*x) * * This is related to the identity * digamma(-x) = digamma(x+1) - digamma(z) + digamma(1-z) * where z is the fractional part of x * For example: * digamma(-3.1) = 1/3.1 + 1/2.1 + 1/1.1 + 1/0.1 + digamma(1-0.1) * = digamma(4.1) - digamma(0.1) + digamma(1-0.1) * Then we use * digamma(1-z) - digamma(z) = pi*cot(pi*z) */ if(x < 0) { return digamma(1-x) + M_PI/tan(-M_PI*x); } /* Use Taylor series if argument <= S */ if(x <= s) return d1 - 1/x + d2*x; /* Reduce to digamma(X + N) where (X + N) >= C */ result = 0; while(x < c) { result -= 1/x; x++; } /* Use de Moivre's expansion if argument >= C */ /* This expansion can be computed in Maple via asympt(Psi(x),x) */ if(x >= c) { double r = 1/x; result += log(x) - 0.5*r; r *= r; result -= r * (s3 - r * (s4 - r * (s5 - r * (s6 - r * s7)))); } return result; }
double Tequ(double dof,void *pinfo) { MINFO *myinfo; myinfo=pinfo; double x = dof/(double)2; return ( ( log(x)-digamma(x)+(double)1 )*(*myinfo).stau + (*myinfo).sxuu); }
double pcache_value(struct gcache_s *lgp, int j) { if ( j<=0 ) return 0; if ( j>=GCACHE ) return digamma(j+lgp->par) - lgp->lgpar; if ( lgp->cache[j]==0 ) { if ( j==1 ) lgp->cache[j] = 1/lgp->par; else if ( j==2 ) lgp->cache[j] = 1/lgp->par + 1/(1+lgp->par); else if ( j==3 ) lgp->cache[j] = 1/lgp->par + 1/(1+lgp->par) + 1/(2+lgp->par); else lgp->cache[j] = digamma(j+lgp->par) - lgp->lgpar; } return lgp->cache[j]; }
void log_p_of_z_given_other_z_c(int datalen, long ncentroids, double *post_gamma, double *log_lambda) { register int c, i; double E_log_p; for (c=0; c<ncentroids; c++) { E_log_p = digamma(post_gamma[2*c]) - digamma(post_gamma[2*c] + post_gamma[2*c+1]); for (i=0; i<c; i++) { E_log_p += digamma(post_gamma[2*i+1]) - digamma(post_gamma[2*i] + post_gamma[2*i+1]); } for (i=0; i<datalen; i++) { log_lambda[c*datalen+i] = E_log_p; } } return; }
TEST(AgradRev,digamma) { AVAR a = 0.5; AVAR f = digamma(a); EXPECT_FLOAT_EQ(boost::math::digamma(0.5),f.val()); AVEC x = createAVEC(a); VEC grad_f; f.grad(x,grad_f); EXPECT_FLOAT_EQ(4.9348022005446793094, grad_f[0]); }
logweight<Real> operator()(logweight<Real> const& x) const { if (linear) return x; double r = x.getReal(); if (x < .0001) // until we can compute digamma in logspace, this will be the answer. and, can't ask digamma(0), because it's negative inf. but exp(-inf)=0 return 0; logweight<Real> ret; ret.setLn(digamma(alpha+r)); }
void diff_dt_nu(double* x, double* nu, double* out) { double t1, t2, t3, t4, t6, t10, t11, t13, t14, t15, t16; t1=((*nu)+1.0)/2.0; t2=digamma(t1); t3=beta((*nu)*0.5,0.5); t4=sqrt(*nu); t6=digamma(0.5*(*nu)); t10=-0.5/t3/t4*(t6-t2+1.0/(*nu)); t11=1.0+((*x)*(*x))/(*nu); t13=pow(t11,-t1); t14=1.0/t3/t4; t15=log(t11); t16=-t1*(*x)*(*x)/(*nu)/(*nu)/t11; out[0]=t10*t13 + t14*(t13*(-0.5*t15-t16)); }
void diffPDF_nu_tCopula_new(double* u, double* v, int* n, double* param, int* copula, double* out) { double out1=0, out2=0, x1, x2; int j=0, k=1; double t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, M, c; double rho = param[0]; double nu = param[1]; t1=digamma((nu+1.0)/2.0); t2=digamma(nu/2.0); t14=rho*rho; t3=0.5*log(1.0-t14); t4=(nu-2.0)/(2.0*nu); t5=0.5*log(nu); t6=-t1+t2+t3-t4-t5; t10=(nu+2.0)/2.0; for(j=0;j<*n;j++) { LL(copula, &k, &u[j], &v[j], &rho, &nu, &c); c=exp(c); x1=qt(u[j],nu,1,0); x2=qt(v[j],nu,1,0); diffX_nu_tCopula(&x1, param, &out1); diffX_nu_tCopula(&x2, param, &out2); t7=1.0+2.0*x1*out1; t8=1.0+2.0*x2*out2; t15=x2*x2; t16=x1*x1; t9=(nu+1.0)/2.0*( t7/(nu+x1*x1) + t8/(nu+t15) ); M=nu*(1.0-t14) + t16 + t15 - 2.0*rho*x1*x2; t11=1.0 - t14 + 2.0*x1*out1 + 2.0*x2*out2 - 2.0*rho*(x1*out2+x2*out1); t12=0.5*log((nu+t16)*(nu+t15)); t13=0.5*log(M); out[j]=c*(t6 + t9 + t12 - t10*t11/M - t13); } }
static void diffset(void) { int i; for (i=3; i<FDIM; i++) { fg[i] = lgamma(i); fp0[i] = digamma(i); fp1[i] = trigamma(i); fp2[i] = tetragamma(i); fp3[i] = pentagamma(i); } fset = 1; }
void compute_tempmat(long datalen, int dim1, int dim2, int ncentroids, double **Temp, double *data1, int **data2_int, double *Mu_bar, double *Mu_tilde, double **S2_x, double **Ksi_log, double ***U_hat_table, double *Ns, double implicit_noisevar, double *log_lambda) { register int i, k; long ind, j,t; double term; for (i = 0; i < ncentroids; i++) { for (j = 0; j < datalen; j++) { Temp[i][j] = 0.0; for (k = 0; k < dim1; k++) { ind = k * ncentroids + i; Temp[i][j] += ((Mu_tilde[ind]+POW2(data1[k*datalen + j]-Mu_bar[ind]) + implicit_noisevar)/ S2_x[i][k]) - Ksi_log[i][k]; } Temp[i][j] /= 2.0; } } for(j=0;j<dim2;j++){ for(i=0;i<ncentroids;i++){ term=0.0; for(k=0;k<(int)(Ns[j]);k++){ term += U_hat_table[j][i][k]; U_hat_table[j][i][k]=digamma(U_hat_table[j][i][k]); } term=digamma(term); for (t=0;t<datalen;t++){ Temp[i][t] += (term - U_hat_table[j][i][data2_int[j][t]]); } } } for (i = 0; i < ncentroids; i++) { for (j = 0; j < datalen; j++) { log_lambda[i * datalen + j] += -dim1*log(2*M_PI)/2 - Temp[i][j]; } } return; }
double var_bayes::inference(const document &doc, std::vector<double>& var_gamma, std::vector<std::vector<double>>& phi) { std::vector<double> digamma_gam(numTopics); for(int k=0; k<numTopics; k++){ var_gamma[k] = alpha.alpha[k] + doc.count/numTopics; } int iteration = 0; double converged = 1; double phisum; std::vector<double> prev_gamma = std::vector<double>(numTopics); while((converged > INF_CONV_THRESH) and (iteration < INF_MAX_ITER)){ iteration++; for(int k=0; k<numTopics; k++){ digamma_gam[k] = digamma(var_gamma[k]); prev_gamma[k] = var_gamma[k]; var_gamma[k] = alpha.alpha[k]; } int n=0; for(auto const& word_count : doc.wordCounts){ phisum = 0; for(int k=0; k<numTopics; k++){ phi[n][k] = digamma_gam[k] + logProbW[k][word_count.first]; if(k>0){ phisum = log_sum(phisum, phi[n][k]); } else { phisum = phi[n][k]; } } // Estimate gamma and phi for(int k=0; k<numTopics; k++){ phi[n][k] = exp(phi[n][k] - phisum); var_gamma[k] += word_count.second*(phi[n][k]); } n++; } converged = 0; for(int k=0; k<numTopics; ++k){ converged += fabs(prev_gamma[k] - var_gamma[k]); } converged /= numTopics; } return compute_likelihood(doc, var_gamma, phi);; }