factor_type gather(icontext_type& context, const vertex_type& vertex, 
                    edge_type& edge) const {
   vertex_type other_vertex = get_other_vertex(edge, vertex);
   // VIOLATING THE ABSTRACTION!
   vertex_data& vdata = graph_type::vertex_type(vertex).data();
   // VIOLATING THE ABSTRACTION!
   vertex_data& other_vdata = other_vertex.data();
   factor_type& doc_topic_count = 
     is_doc(vertex) ? vdata.factor : other_vdata.factor;
   factor_type& word_topic_count = 
     is_word(vertex) ? vdata.factor : other_vdata.factor;
   ASSERT_EQ(doc_topic_count.size(), NTOPICS);
   ASSERT_EQ(word_topic_count.size(), NTOPICS);
   // run the actual gibbs sampling 
   factor_type& belief = edge.data().belief;
   const uint32_t count = edge.data().count;
   // Resample the topics
   double sum = 0, old_sum = 0;
   for(size_t t = 0; t < NTOPICS; ++t) {
     old_sum += belief[t];
     doc_topic_count[t] -= belief[t];
     word_topic_count[t] -= belief[t];
     GLOBAL_TOPIC_COUNT[t] -= belief[t];
     const double n_dt = 
       std::max(count_type(doc_topic_count[t]), count_type(0));
     ASSERT_GE(n_dt, 0);
     const double n_wt = 
       std::max(count_type(word_topic_count[t]), count_type(0)); 
     ASSERT_GE(n_wt, 0);
     const double n_t  = 
       std::max(count_type(GLOBAL_TOPIC_COUNT[t]), count_type(0)); 
     ASSERT_GE(n_t, 0);
     belief[t] = (ALPHA + n_dt) * (BETA + n_wt) / (BETA * NWORDS + n_t);
     sum += belief[t];
   } // End of loop over each token
   ASSERT_GT(sum, 0);
   if(old_sum == 0) {
     size_t asg = graphlab::random::multinomial(belief);
     for(size_t i = 0; i < NTOPICS; ++i) belief[i] = 0;
     belief[asg] = count;
     return belief;
   }
   for(size_t t = 0; t < NTOPICS; ++t) {
     belief[t] = count * (belief[t]/sum);
     doc_topic_count[t] += belief[t];
     word_topic_count[t] += belief[t];
     GLOBAL_TOPIC_COUNT[t] += belief[t];
   }
   return belief;
 } // end of gather
Exemple #2
0
	void apply(icontext_type& context, vertex_type& vertex,
               const gather_type& total)
    	{
		vertex_data v=vertex.parse<vertex_data>();
		
		if(is_doc(vertex))
		{
			for (int k = 0; k < env_inst.NTOPICS; k++)
			{ 
				env_inst.theta[k2id(v.id)][k] += (v.n[k] + env_inst.ALPHA) / (env_inst.ndsum[k2id(v.id)] + env_inst.NTOPICS * env_inst.ALPHA);
			}
		}
		else
		{		
			for (int k = 0; k < env_inst.NTOPICS; k++) 
				env_inst.phi[k][v.id] += (v.n[k] + env_inst.BETA) / (env_inst.nwsum[k] + env_inst.NWORDS * env_inst.BETA);
	    	}
	}