void LDA::RunEM(const Str &type, CorpusC &train, CorpusC &test, LdaModel* m) { LdaSuffStats ss(*m); if (type == "seeded") { ss.CorpusInitSS(train, *m); } else if (type == "random") { ss.RandomInitSS(*m); } LdaMLE(0, ss, m); double converged = 1; double likelihood_old = 0; for (int i = 0; i < em_max_iter_; i++) { VVReal gamma(train.Len()); VVVReal phi(train.Len()); VReal likelihood(train.Len()); #pragma omp parallel for for (size_t d = 0; d < train.Len(); d++) { likelihood[d] = Infer(train, d, *m, &gamma[d], &phi[d]); } double likelihoods = 0; ss.InitSS(*m, 0); for (size_t d = 0; d<train.Len(); d++) { double gamma_sum = 0; for (int k = 0; k < m->num_topics; k++) { gamma_sum += gamma[d][k]; } for (int k = 0; k < m->num_topics; k++) { ss.alpha_suffstats[k] += DiGamma(gamma[d][k]) - DiGamma(gamma_sum); } for (size_t n = 0; n < train.ULen(d); n++) { for (int k = 0; k < m->num_topics; k++) { ss.class_word[k][train.Word(d, n)] += train.Count(d, n) * phi[d][n][k]; ss.class_total[k] += train.Count(d, n) * phi[d][n][k]; } } ss.num_docs = ss.num_docs + 1; likelihoods += likelihood[d]; } LdaMLE(estimate_alpha_, ss, m); converged = (likelihood_old - likelihoods) / (likelihood_old); if (converged < 0) { var_max_iter_ = var_max_iter_ * 2; } likelihood_old = likelihoods; if (i % 10 == 0) { VVReal gamma2; VVVReal phi2; LOG(INFO) << "em " << i << " perplexity:" << Infer(test, *m, &gamma2, &phi2); } } }
void ProofNode::Prove() { //make any inferences if possible Infer(*this); //recurse down both branches if (_left) _left->Prove(); if (_right) _right->Prove(); //print the finished graph }
double VarMGCTM::Infer(CorpusC &test, MGCTMC &m) { double sum = 0.0; VReal likelihoods(test.Len()); #pragma omp parallel for for (size_t d = 0; d < test.Len(); d++) { MGVar var; likelihoods[d] = Infer(test.docs[d], m, &var); } for (size_t d = 0; d < test.Len(); d++) { sum += likelihoods[d]; } return exp(- sum / test.TWordsNum()); }
/***** Infer and compute suffstats, the motivation of infer is computing suffstats phi: topic * doc_len update z_bar *****/ void VarRTM::EStep(CorpusC &cor, RTMC &m, RTMSuffStats* ss) const { RTMVar var; Infer(cor, m, &var); ss->z_bar.resize(m.TopicNum(), cor.Len()); for (size_t d = 0; d < cor.Len(); d++) { for (size_t n = 0; n < cor.ULen(d); n++) { for (int k = 0; k < m.TopicNum(); k++) { ss->topic(k, cor.Word(d, n)) += cor.Count(d, n) * var.phi[d](k, n); ss->topic_sum[k] += cor.Count(d, n) * var.phi[d](k, n); } } ss->z_bar.col(d) = var.z_bar.col(d); } }
void VarMGCTM::RunEM(CorpusC &test, MGCTM* m) { MGSS ss; ss.CorpusInit(cor_, *m); MStep(ss, m); LOG(INFO) << m->pi.transpose(); for (int i = 0; i < converged_.em_max_iter_; i++) { std::vector<MGVar> vars(cor_.Len()); VReal likelihoods(cor_.Len()); #pragma omp parallel for for (size_t d = 0; d < cor_.Len(); d++) { likelihoods[d] = Infer(cor_.docs[d], *m, &vars[d]); } double likelihood = 0; VStr etas(cor_.Len()); ss.SetZero(m->GTopicNum(), m->LTopicNum1(), m->LTopicNum2(), m->TermNum()); for (size_t d = 0; d < cor_.Len(); d++) { DocC &doc = cor_.docs[d]; for (size_t n = 0; n < doc.ULen(); n++) { for (int k = 0; k < m->GTopicNum(); k++) { ss.g_topic(k, doc.Word(n)) += doc.Count(n)*vars[d].g_z(k, n)* (1 - vars[d].delta[n]); ss.g_topic_sum[k] += doc.Count(n)*vars[d].g_z(k, n)*(1 - vars[d].delta[n]); } } for (int j = 0; j < m->LTopicNum1(); j++) { for (size_t n = 0; n < doc.ULen(); n++) { for (int k = 0; k < m->LTopicNum2(); k++) { ss.l_topic[j](k, doc.Word(n)) += doc.Count(n)*vars[d].l_z[j](k, n) *vars[d].delta[n]*vars[d].eta[j]; ss.l_topic_sum(k, j) += doc.Count(n)*vars[d].l_z[j](k, n) * vars[d].delta[n] * vars[d].eta[j]; } } } for (int j = 0; j < m->LTopicNum1(); j++) { ss.pi[j] += vars[d].eta[j]; } etas[d] = EVecToStr(vars[d].eta); likelihood += likelihoods[d]; } MStep(ss, m); LOG(INFO) << m->pi.transpose(); OutputFile(*m, Join(etas,"\n"), i); // LOG(INFO) <<"perplexity: " <<Infer(test,*m); } }
double LDA::Infer(CorpusC &cor, const LdaModel &m,VVReal* ga, VVVReal* phi) const { ga->resize(cor.Len()); phi->resize(cor.Len()); VReal likelihood(cor.Len()); #pragma omp parallel for for (size_t d = 0; d < cor.Len(); d++) { likelihood[d] = Infer(cor, d, m, &(ga->at(d)), &(phi->at(d))); } double sum = 0.0; for (size_t d = 0; d < cor.Len(); d++) { sum += likelihood[d]; } return exp(- sum / cor.TWordsNum()); }
static void HandleAdd(QUAD *head) { if ((head->ans->size == ISZ_ADDR) && (head->temps & TEMP_ANS)) { if (head->dc.opcode == i_add && head->dc.left->mode == i_immed) { if (head->temps & TEMP_RIGHT) { if (isintconst(head->dc.left->offset)) { // C + R ALIASLIST *scan = tempInfo[head->dc.right->offset->v.sp->value.i]->pointsto; ALIASLIST *result = NULL; BOOLEAN xchanged = changed; while (scan) { ALIASADDRESS *addr = LookupAddress(scan->address->name, scan->address->offset + head->dc.left->offset->v.i); ALIASLIST *al = aAlloc(sizeof(ALIASLIST)); al->address = addr; AliasUnion(&result, al); scan = scan->next; } changed = xchanged; AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto, result); } else { // p + R if (head->dc.left->offset->type != en_labcon) // needed for exports { ALIASNAME *nm = LookupMem(head->dc.left->offset->v.sp->imvalue); ALIASADDRESS *aa = LookupAddress(nm, 0); ALIASLIST *al = aAlloc(sizeof(ALIASLIST)); al->address = aa; Infer(head->ans, head->dc.right, al); } } } else if (head->dc.right->mode == i_immed) { if (!isintconst(head->dc.left->offset) && head->dc.left->offset->type != en_labcon) { // p + C ALIASNAME *nm = LookupMem(head->dc.left->offset->v.sp->imvalue); ALIASADDRESS *aa = LookupAddress(nm, head->dc.right->offset->v.i); ALIASLIST *al = aAlloc(sizeof(ALIASLIST)); al->address = aa; AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto,al); } else if (!isintconst(head->dc.right->offset) && head->dc.right->offset->type != en_labcon) { // C + p ALIASNAME *nm = LookupMem(head->dc.right->offset->v.sp->imvalue); ALIASADDRESS *aa = LookupAddress(nm, head->dc.left->offset->v.i); ALIASLIST *al = aAlloc(sizeof(ALIASLIST)); al->address = aa; AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto, al); } } } else if (head->dc.right->mode == i_immed) { if (head->temps & TEMP_LEFT) { if (isintconst(head->dc.right->offset)) { // R+C int c = head->dc.opcode == i_add ? head->dc.right->offset->v.i : -head->dc.right->offset->v.i; ALIASLIST *scan = tempInfo[head->dc.left->offset->v.sp->value.i]->pointsto; ALIASLIST *result = NULL; BOOLEAN xchanged = changed; while (scan) { ALIASADDRESS *addr = LookupAddress(scan->address->name, scan->address->offset + c); ALIASLIST *al = aAlloc(sizeof(ALIASLIST)); al->address = addr; AliasUnion(&result, al); scan = scan->next; } changed = xchanged; AliasUnion(&tempInfo[head->ans->offset->v.sp->value.i]->pointsto, result); } else { // R + p if (head->dc.right->offset->type != en_labcon) // needed for exports { ALIASNAME *nm = LookupMem(head->dc.right->offset->v.sp->imvalue); ALIASADDRESS *aa = LookupAddress(nm, 0); ALIASLIST *al = aAlloc(sizeof(ALIASLIST)); al->address = aa; Infer(head->ans, head->dc.left, al); } } } } else if ((head ->temps & (TEMP_LEFT | TEMP_RIGHT)) == (TEMP_LEFT | TEMP_RIGHT)) { // R+R ALIASLIST *src; IMODE *one = head->dc.left; IMODE *two = head->dc.right; if (two->size == ISZ_ADDR) { IMODE *three = one; one = two; two = three; } if (one->size == ISZ_ADDR) { // now one has the pointer, two has something else src = tempInfo[one->offset->v.sp->value.i]->pointsto; Infer(head->ans, two, src); } } } }