factor_type gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const { vertex_type other_vertex = get_other_vertex(edge, vertex); // VIOLATING THE ABSTRACTION! vertex_data& vdata = graph_type::vertex_type(vertex).data(); // VIOLATING THE ABSTRACTION! vertex_data& other_vdata = other_vertex.data(); factor_type& doc_topic_count = is_doc(vertex) ? vdata.factor : other_vdata.factor; factor_type& word_topic_count = is_word(vertex) ? vdata.factor : other_vdata.factor; ASSERT_EQ(doc_topic_count.size(), NTOPICS); ASSERT_EQ(word_topic_count.size(), NTOPICS); // run the actual gibbs sampling factor_type& belief = edge.data().belief; const uint32_t count = edge.data().count; // Resample the topics double sum = 0, old_sum = 0; for(size_t t = 0; t < NTOPICS; ++t) { old_sum += belief[t]; doc_topic_count[t] -= belief[t]; word_topic_count[t] -= belief[t]; GLOBAL_TOPIC_COUNT[t] -= belief[t]; const double n_dt = std::max(count_type(doc_topic_count[t]), count_type(0)); ASSERT_GE(n_dt, 0); const double n_wt = std::max(count_type(word_topic_count[t]), count_type(0)); ASSERT_GE(n_wt, 0); const double n_t = std::max(count_type(GLOBAL_TOPIC_COUNT[t]), count_type(0)); ASSERT_GE(n_t, 0); belief[t] = (ALPHA + n_dt) * (BETA + n_wt) / (BETA * NWORDS + n_t); sum += belief[t]; } // End of loop over each token ASSERT_GT(sum, 0); if(old_sum == 0) { size_t asg = graphlab::random::multinomial(belief); for(size_t i = 0; i < NTOPICS; ++i) belief[i] = 0; belief[asg] = count; return belief; } for(size_t t = 0; t < NTOPICS; ++t) { belief[t] = count * (belief[t]/sum); doc_topic_count[t] += belief[t]; word_topic_count[t] += belief[t]; GLOBAL_TOPIC_COUNT[t] += belief[t]; } return belief; } // end of gather
void apply(icontext_type& context, vertex_type& vertex, const gather_type& total) { vertex_data v=vertex.parse<vertex_data>(); if(is_doc(vertex)) { for (int k = 0; k < env_inst.NTOPICS; k++) { env_inst.theta[k2id(v.id)][k] += (v.n[k] + env_inst.ALPHA) / (env_inst.ndsum[k2id(v.id)] + env_inst.NTOPICS * env_inst.ALPHA); } } else { for (int k = 0; k < env_inst.NTOPICS; k++) env_inst.phi[k][v.id] += (v.n[k] + env_inst.BETA) / (env_inst.nwsum[k] + env_inst.NWORDS * env_inst.BETA); } }