float rbm_predict(const vertex_data & usr, const vertex_data & mov, const float rating, double & prediction, void * extra) { return rbm_predict(rbm_user((vertex_data&)usr), rbm_movie((vertex_data&)mov), rating, prediction, NULL); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0) { if (is_user(vertex.id()) && vertex.num_outedges() > 0) { vertex_data& user = latent_factors_inmem[vertex.id()]; user.pvec = zeros(D*3); for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); int r = (int)(observation/rbm_scaling); assert(r < rbm_bins); mov.bi[r]++; } } return; } else if (gcontext.iteration == 1) { if (vertex.num_inedges() > 0) { rbm_movie mov = latent_factors_inmem[vertex.id()]; setRand2(mov.w, D*rbm_bins, 0.001); for(int r = 0; r < rbm_bins; ++r) { mov.bi[r] /= (double)vertex.num_inedges(); mov.bi[r] = log(1E-9 + mov.bi[r]); if (mov.bi[r] > 1000) { assert(false); Rcpp::Rcerr<<"Numerical overflow" <<std::endl; } } } return; //done with initialization } //go over all user nodes if (is_user(vertex.id()) && vertex.num_outedges()) { vertex_data & user = latent_factors_inmem[vertex.id()]; user.pvec = zeros(3*D); rbm_user usr(user); vec v1 = zeros(vertex.num_outedges()); //go over all ratings for(int e=0; e < vertex.num_outedges(); e++) { float observation = vertex.edge(e)->get_data(); rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; int r = (int)(observation / rbm_scaling); assert(r < rbm_bins); for(int k=0; k < D; k++) { usr.h[k] += mov.w[D*r + k]; assert(!std::isnan(usr.h[k])); } } for(int k=0; k < D; k++) { usr.h[k] = sigmoid(usr.h[k]); if (drand48() < usr.h[k]) usr.h0[k] = 1; else usr.h0[k] = 0; } int i = 0; double prediction; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); predict1(usr, mov, observation, prediction); int vi = (int)(prediction / rbm_scaling); v1[i] = vi; i++; } i = 0; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; int r = (int)v1[i]; for (int k=0; k< D; k++) { usr.h1[k] += mov.w[r*D+k]; } i++; } for (int k=0; k < D; k++) { usr.h1[k] = sigmoid(usr.h1[k]); if (drand48() < usr.h1[k]) usr.h1[k] = 1; else usr.h1[k] = 0; } i = 0; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); double prediction; rbm_predict(user, mov, observation, prediction, NULL); double pui = prediction / rbm_scaling; double rui = observation / rbm_scaling; rmse_vec[omp_get_thread_num()] += (pui - rui) * (pui - rui); //nn += 1.0; int vi0 = (int)(rui); int vi1 = (int)v1[i]; for (int k = 0; k < D; k++) { mov.w[D*vi0+k] += rbm_alpha * (usr.h0[k] - rbm_beta * mov.w[vi0*D+k]); assert(!std::isnan(mov.w[D*vi0+k])); mov.w[D*vi1+k] -= rbm_alpha * (usr.h1[k] + rbm_beta * mov.w[vi1*D+k]); assert(!std::isnan(mov.w[D*vi1+k])); } i++; } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //compute only for user nodes if (vertex.id() >= std::min(M,(uint)end_user) || vertex.id() < (uint)start_user) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; int howmany = (int)(N*knn_sample_percent); assert(howmany > 0 ); if (vertex.num_outedges() == 0){ mymutex.lock(); users_without_ratings++; mymutex.unlock(); } vec distances = zeros(howmany); ivec indices = ivec::Zero(howmany); for (int i=0; i< howmany; i++){ indices[i]= -1; } std::vector<bool> curratings; curratings.resize(N); for(int e=0; e < vertex.num_edges(); e++) { //no need to calculate this rating since it is given in the training data reference assert(vertex.edge(e)->vertex_id() - M >= 0 && vertex.edge(e)->vertex_id() - M < N); curratings[vertex.edge(e)->vertex_id() - M] = true; } if (knn_sample_percent == 1.0){ for (uint i=M; i< M+N; i++){ if (curratings[i-M]) continue; vertex_data & other = latent_factors_inmem[i]; double dist; if (algo == SVDPP) svdpp_predict(vdata, other, 0, dist); else if (algo == BIASSGD) biassgd_predict(vdata, other, 0, dist); else if (algo == RBM) rbm_predict(vdata, other, 0, dist); else assert(false); indices[i-M] = i-M; distances[i-M] = dist + 1e-10; } } else for (int i=0; i<howmany; i++){ int random_other = ::randi(M, M+N-1); vertex_data & other = latent_factors_inmem[random_other]; double dist; if (algo == SVDPP) svdpp_predict(vdata, other, 0, dist); else if (algo == BIASSGD) biassgd_predict(vdata, other, 0, dist); else if (algo == RBM) rbm_predict(vdata, other, 0, dist); else assert(false); indices[i] = random_other-M; distances[i] = dist; } vec out_dist(num_ratings); ivec indices_sorted = reverse_sort_index2(distances, indices, out_dist, num_ratings); assert(indices_sorted.size() <= num_ratings); assert(out_dist.size() <= num_ratings); vdata.ids = indices_sorted; vdata.ratings = out_dist; if (debug) printf("Closest is: %d with distance %g\n", (int)vdata.ids[0], vdata.ratings[0]); if (vertex.id() % 1000 == 0) printf("Computing recommendations for user %d at time: %g\n", vertex.id()+1, mytimer.current_time()); }