/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = is_user(vertex.id()); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data().weight; uint time = vertex.edge(e)->get_data().time; vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; vertex_data & time_node = latent_factors_inmem[time]; assert(time != vertex.id() && time != vertex.edge(e)->vertex_id()); Map<vec> X(nbr_latent.pvec, NLATENT); Map<vec> Y(time_node.pvec, NLATENT); vec XY = X.cwiseProduct(Y); Xty += XY * observation; XtX.triangularView<Eigen::Upper>() += XY * XY.transpose(); if (compute_rmse) { double prediction; vdata.rmse += als_tensor_predict(vdata, nbr_latent, time_node, observation, prediction); } } for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); // Solve the least squares problem with eigen using Cholesky decomposition Map<vec> vdata_vec(vdata.pvec, NLATENT); vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; bool isuser = vertex.id() < (uint)M; if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata.pvec = CoSaMP(XtX, Xty, (int)ceil(sparsity_level*(double)D), 10, 1e-4, D); } else vdata.pvec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * edge.weight * edge.time; XtX.triangularView<Eigen::Upper>() += X * X.transpose() * edge.time; if (compute_rmse) { double prediction; vdata.rmse += wals_predict(vdata, nbr_latent, edge.weight, prediction) * edge.time; } } // Diagonal for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); // Solve the least squares problem with eigen using Cholesky decomposition Map<vec> vdata_vec(vdata.pvec, NLATENT); vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all user nodes if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double estScore; rmse_vec[omp_get_thread_num()] += sgd_predict(user, movie, observation, estScore); double err = observation - estScore; if (std::isnan(err) || std::isinf(err)) logstream(LOG_FATAL)<<"SGD got into numerical error. Please tune step size using --sgd_gamma and sgd_lambda" << std::endl; //NOTE: the following code is not thread safe, since potentially several //user nodes may updates this item gradient vector concurrently. However in practice it //did not matter in terms of accuracy on a multicore machine. //if you like to defend the code, you can define a global variable //mutex mymutex; // //and then do: mymutex.lock() movie.pvec += sgd_gamma*(err*user.pvec - sgd_lambda*movie.pvec); //and here add: mymutex.unlock(); user.pvec += sgd_gamma*(err*movie.pvec - sgd_lambda*user.pvec); } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * observation; XtX += X * X.transpose(); if (compute_rmse) { double prediction; vdata.rmse += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); bool isuser = vertex.id() < (uint)M; Map<vec> vdata_vec(vdata.pvec, NLATENT); if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata_vec = CoSaMP(XtX, Xty, ceil(sparsity_level*(double)NLATENT), 10, 1e-4, NLATENT); } else vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* On subsequent iterations, find the minimum label of my neighbors */ if (!edge_count){ vid_t curmin = vertex_values[vertex.id()]; if (gcontext.iteration == 0 && vertex.num_edges() > 0){ mymutex.lock(); actual_vertices++; mymutex.unlock(); } for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } if (vertex_values[vertex.id()] > curmin) { changes++; set_data(vertex, curmin); } } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); if (vertex.edge(i)->vertex_id() > vertex.id()){ mymutex.lock(); state[curmin]++; mymutex.unlock(); } } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; bool isuser = vertex.id() < M; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); float observation = edge.weight; vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX.triangularView<Eigen::Upper>() += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += pmf_predict(vdata, nbr_latent, observation, prediction, (void*)&edge.avgprd); vertex.edge(e)->set_data(edge); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; // Solve the least squares problem with eigen using Cholesky decomposition mat iAi_; bool ret =inv((isuser? A_U : A_V) + alpha * XtX, iAi_); assert(ret); vec mui_ = iAi_*((isuser? (A_U*mu_U) : (A_V*mu_V)) + alpha * Xty); vdata.pvec = mvnrndex(mui_, iAi_, D, 0); assert(vdata.pvec.size() == D); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all samples (rows) if ( vertex.num_outedges() > 0){ assert(vertex.id() < M); vertex_data & row = latent_factors_inmem[vertex.id()]; assert(row.y == -1 || row.y == 1); if (debug) std::cout<<"Entered item " << vertex.id() << " y: " << row.y << std::endl; row.sigma = beta*beta; row.xT_mu = 0; //go over all features for(int e=0; e < vertex.num_outedges(); e++) { uint feature_id = vertex.edge(e)->vertex_id(); edge_data edge = vertex.edge(e)->get_data(); assert(sigma_ij[feature_id] > 0); assert(edge.x_ij == 1); /* compute equation (6) */ row.sigma += edge.x_ij * sigma_ij[feature_id]; /* compute the sum xT*w as needed in equations (7) and (8) */ row.xT_mu += edge.x_ij * mu_ij[feature_id]; } double prediction; double ret = ctr_predict(row, row, row.y, prediction); double predicted_target = prediction < 0 ? -1: 1; if ((predicted_target == -1 && row.y == 1) || (predicted_target == 1 && row.y == -1)) err_vec[omp_get_thread_num()] += 1.0; if (debug) std::cout<<"Prediction was: " << prediction << " real value: " << row.y << std::endl; liklihood_vec[omp_get_thread_num()] += ret; assert(row.sigma > 0); //go over all features for(int e=0; e < vertex.num_outedges(); e++) { edge_data edge = vertex.edge(e)->get_data(); uint feature_id = vertex.edge(e)->vertex_id(); assert(row.sigma > 0); double product = row.y * row.xT_mu / sqrt(row.sigma); mu_ij[feature_id] += (row.y * edge.x_ij * sigma_ij[feature_id] / sqrt(row.sigma)) * v(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " v(product): " << v(product) << " value: " <<(row.y * edge.x_ij * edge.sigma_ij * edge.sigma_ij / sqrt(row.sigma)) * v(product) << std::endl; double factor = 1.0 - (edge.x_ij * sigma_ij[feature_id] / row.sigma)*w(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " w(product): " << w(product) << " factor: " << (1.0 - (edge.x_ij * edge.sigma_ij / row.sigma)*w(product)) << " sigma_ij " << edge.sigma_ij << " product: " << edge.sigma_ij * factor << std::endl; assert(factor > 0); sigma_ij[feature_id] *= factor; assert(sigma_ij[feature_id] > 0); } } }
// Helper virtual void set_latent_factor(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, latentvec_t &fact) { vertex.set_data(fact); for(int i=0; i < vertex.num_edges(); i++) { als_factor_and_weight factwght = vertex.edge(i)->get_data(); factwght.factor = fact; vertex.edge(i)->set_data(factwght); // Note that neighbors override the values they have written to edges. // This is ok, because vertices are always executed in same order. } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; memset(&user.weight[0], 0, sizeof(double)*D); for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; user.weight += movie.weight; } // sqrt(|N(u)|) float usrNorm = double(1.0/sqrt(vertex.num_outedges())); //sqrt(|N(u)| * sum_j y_j user.weight *= usrNorm; vec step = zeros(D); // main algorithm, see Koren's paper, just below below equation (16) for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); double estScore; rmse_vec[omp_get_thread_num()] += svdpp_predict(user, movie,observation, estScore); // e_ui = r_ui - \hat{r_ui} float err = observation - estScore; assert(!std::isnan(rmse_vec[omp_get_thread_num()])); vec itmFctr = movie.pvec; vec usrFctr = user.pvec; //q_i = q_i + gamma2 *(e_ui*(p_u + sqrt(N(U))\sum_j y_j) - gamma7 *q_i) for (int j=0; j< D; j++) movie.pvec[j] += svdpp.itmFctrStep*(err*(usrFctr[j] + user.weight[j]) - svdpp.itmFctrReg*itmFctr[j]); //p_u = p_u + gamma2 *(e_ui*q_i -gamma7 *p_u) for (int j=0; j< D; j++) user.pvec[j] += svdpp.usrFctrStep*(err *itmFctr[j] - svdpp.usrFctrReg*usrFctr[j]); step += err*itmFctr; //b_i = b_i + gamma1*(e_ui - gmma6 * b_i) movie.bias += svdpp.itmBiasStep*(err-svdpp.itmBiasReg* movie.bias); //b_u = b_u + gamma1*(e_ui - gamma6 * b_u) user.bias += svdpp.usrBiasStep*(err-svdpp.usrBiasReg* user.bias); } step *= float(svdpp.itmFctr2Step*usrNorm); //gamma7 double mult = svdpp.itmFctr2Step*svdpp.itmFctr2Reg; for(int e=0; e < vertex.num_edges(); e++) { vertex_data& movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; //y_j = y_j + gamma2*sqrt|N(u)| * q_i - gamma7 * y_j movie.weight += step - mult * movie.weight; } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.id() < (uint)mi.start || vertex.id() >= (uint)mi.end) return; vertex_data& user = latent_factors_inmem[vertex.id()]; bool rows = vertex.id() < (uint)info.get_start_node(false); if (info.is_square()) rows = mi.A_transpose; (void) rows; // unused assert(mi.r_offset >=0); //store previous value for convergence detection if (mi.prev_offset >= 0) user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset]; double val = 0; assert(mi.x_offset >=0 || mi.y_offset>=0); /*** COMPUTE r = c*A*x ********/ if (mi.A_offset && mi.x_offset >= 0){ for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); const vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; val += (edge.weight * movie.pvec[mi.x_offset]); } if (info.is_square() && mi.use_diag)// add the diagonal term val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]); val *= mi.c; } /***** COMPUTE r = c*I*x *****/ else if (!mi.A_offset && mi.x_offset >= 0){ val = mi.c*user.pvec[mi.x_offset]; } /**** COMPUTE r+= d*y (optional) ***/ if (mi.y_offset>= 0){ val += mi.d*user.pvec[mi.y_offset]; } /***** compute r = (... ) / div */ if (mi.div_offset >= 0){ val /= user.pvec[mi.div_offset]; } assert(mi.r_offset>=0 && mi.r_offset < user.pvec.size()); user.pvec[mi.r_offset] = val; } //end update
/** * calc distance between two items. * Let a be all the users rated item 1 * Let b be all the users rated item 2 * * 3) Using Pearson correlation * Dist_ab = (a - mean)*(b- mean)' / (std(a)*std(b)) * * 4) Using cosine similarity: * Dist_ab = (a*b) / sqrt(sum_sqr(a)) * sqrt(sum_sqr(b))) * * 5) Using chebychev: * Dist_ab = max(abs(a-b)) * * 6) Using manhatten distance: * Dist_ab = sum(abs(a-b)) * * 7) Using tanimoto: * Dist_ab = 1.0 - [(a*b) / (sum_sqr(a) + sum_sqr(b) - a*b)] * * 8) Using log likelihood similarity * Dist_ab = 1.0 - 1.0/(1.0 + loglikelihood) * * 9) Using Jaccard: * Dist_ab = intersect(a,b) / (size(a) + size(b) - intersect(a,b)) */ double calc_distance(graphchi_vertex<VertexDataType, EdgeDataType> &v, vid_t pivot, int distance_metric) { //assert(is_pivot(pivot)); //assert(is_item(pivot) && is_item(v.id())); dense_adj &pivot_edges = adjs[pivot - pivot_st]; int num_edges = v.num_edges(); dense_adj item_edges; for(int i=0; i < num_edges; i++){ set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data()); } if (distance_metric == JACCARD_WEIGHT){ return calc_jaccard_weight_distance(pivot_edges.edges, item_edges.edges, get_val( pivot_edges.edges, v.id()), 0); } return NAN; }
/** * compute validaton RMSE for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_outedges(); e++) { const EdgeDataType & observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; double rmse = (*pprediction_func)(vdata, nbr_latent, observation, prediction, NULL); // assert(rmse <= pow(maxval - minval, 2)); <ice> assert(validation_rmse_vec.size() > omp_get_thread_num()); validation_rmse_vec[omp_get_thread_num()] += rmse; } }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if(gcontext.iteration == 0){ if(vertex.num_edges() == 0) return; VertexDataType vertexdata = vertex.get_data(); if(!vertexdata.confirmed || !vertexdata.reconfirmed) return ; //assert(vertex.num_inedges() * vertex.num_outedges() <= product); int ct = 0; for(int i=0; i<vertex.num_edges(); i++){ graphchi_edge<EdgeDataType>* edge = vertex.edge(i); bidirectional_label edgedata = edge->get_data(); if(edgedata.is_equal()){ /* if(edgedata.smaller_one != 0) std::cout<<edgedata.smaller_one<<" \t"<<edgedata.larger_one<<"\t root="<<root<<std::endl; */ if(root == edgedata.my_label(vertex.id(), edge->vertexid)){ ct++; } } /* lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); */ } assert(ct > 1); } }
/** * Grab pivot's adjacency list into memory. */ int load_edges_into_memory(graphchi_vertex<uint32_t, edge_data> &v) { assert(is_pivot(v.id())); assert(is_user(v.id())); int num_edges = v.num_edges(); dense_adj dadj; for(int i=0; i<num_edges; i++) set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight); //dadj.ratings = zeros(N); dadj.vid = v.id(); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/); return num_edges; }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; if (vertex.num_edges() == 0 || vdata.seed) //no edges, nothing to do here return; vec ret = zeros(D); double normalization = 0; for(int e=0; e < vertex.num_edges(); e++) { edge_data edge = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; ret += edge.cooccurence_count * nbr_latent.pvec; normalization += edge.cooccurence_count; } ret /= normalization; vdata.pvec = alpha * vdata.pvec + (1-alpha)*ret; }
/** * Grab pivot's adjacency list into memory. */ int grab_adj(graphchi_vertex<uint32_t, uint32_t> &v) { if(is_pivot(v.id())) { int ncount = v.num_edges(); // Count how many neighbors have larger id than v v.sort_edges_indirect(); int actcount = 0; vid_t lastvid = 0; for(int i=0; i<ncount; i++) { if (v.edge(i)->vertexid > v.id() && v.edge(i)->vertexid != lastvid) actcount++; // Need to store only ids larger than me lastvid = v.edge(i)->vertex_id(); } // Allocate the in-memory adjacency list, using the // knowledge of the number of edges. dense_adj dadj = dense_adj(actcount, (vid_t*) calloc(sizeof(vid_t), actcount)); actcount = 0; lastvid = 0; for(int i=0; i<ncount; i++) { if (v.edge(i)->vertexid > v.id() && v.edge(i)->vertexid != lastvid) { // Need to store only ids larger than me dadj.adjlist[actcount++] = v.edge(i)->vertex_id(); } lastvid = v.edge(i)->vertex_id(); } assert(dadj.count == actcount); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, actcount); return actcount; } return 0; }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0){ if (vertex.num_outedges() == 0 && vertex.id() < M) logstream(LOG_FATAL)<<"NMF algorithm can not work when the row " << vertex.id() << " of the matrix contains all zeros" << std::endl; for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); if (observation < 0 ){ logstream(LOG_FATAL)<<"Found a negative entry in matirx row " << vertex.id() << " with value: " << observation << std::endl; } } return; } bool isuser = (vertex.id() < M); if ((iter % 2 == 1 && !isuser) || (iter % 2 == 0 && isuser)) return; vec ret = zeros(D); vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; rmse_vec[omp_get_thread_num()] += nmf_predict(vdata, nbr_latent, observation, prediction); if (prediction == 0) logstream(LOG_FATAL)<<"Got into numerical error! Please submit a bug report." << std::endl; ret += nbr_latent.pvec * (observation / prediction); } vec px; if (isuser) px = sum_of_item_latent_features; else px = sum_of_user_latent_feautres; for (int i=0; i<D; i++){ assert(px[i] != 0); vdata.pvec[i] *= ret[i] / px[i]; if (vdata.pvec[i] < epsilon) vdata.pvec[i] = epsilon; } }
/** * Pagerank update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) { //array[v.id()]++; if (ginfo.iteration == 0 && v.num_edges() > 0) { nbs.clear(); for(int i=0; i<v.num_edges(); i++){ nbs.insert(v.edge(i)->vertex_id()); } num_edges += nbs.size(); /* if(v.num_inedges() > 0){ //lock.lock(); num_edges += v.num_inedges(); //lock.unlock(); } */ } else if(ginfo.iteration == 1){ if(v.id() == 0){ fprintf(fp_metis, "%u %u\n", num_vertices, num_edges/2); } if(v.num_edges() > 0){ nbs.clear(); for(int i=0; i<v.num_edges(); i++){ nbs.insert(v.edge(i)->vertex_id()); /* graphchi_edge<EdgeDataType> * edge = v.edge(i); //EdgeDataType edata = edge->get_data(); vid_t nb_id = edge->vertex_id(); //lock.lock(); fprintf(fp_metis, "%u ", nb_id+1); //lock.unlock(); //edge->set_data(edata); */ } std::set<vid_t>::iterator it; for(it = nbs.begin(); it != nbs.end(); it++){ fprintf(fp_metis, "%u ", (*it)+1); } fprintf(fp_metis, "\n"); }else{ fprintf(fp_metis, "\n"); } } }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* This program requires selective scheduling. */ assert(gcontext.scheduler != NULL); if(gcontext.iteration == 0) { set_data(vertex, vertex.id()); /* Schedule neighbor for update */ gcontext.scheduler->add_task(vertex.id()); return; } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } if ( curmin < vertex.get_data() ) { for(int i=0; i < vertex.num_edges(); i++) { if (curmin < neighbor_value(vertex.edge(i))) { /* Schedule neighbor for update */ gcontext.scheduler->add_task(vertex.edge(i)->vertex_id()); } } set_data(vertex, curmin); } } /* On subsequent iterations, find the minimum label of my neighbors */ /* If my label changes, schedule neighbors */ }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0){ //Vertexinfo vdata = vertex.get_data(); //id_th = vertex.id(); vid_t row = vertex.id() / nshards; //vid_t new_id = row * nshards + prefix_sum[vertex.id() % nshards]; vid_t new_id = row + prefix_sum[vertex.id() % nshards]; vertex.set_data(new_id); //source vertex in each CC //vdata.level = 0; //vertex.set_data(vdata); //vid_t new_id = getNewId(vdata.ccid, vdata.level); for(int i=0; i<vertex.num_edges(); i++){ bidirectional_label edata = vertex.edge(i)->get_data(); edata.my_label(vertex.id(), vertex.edge(i)->vertex_id()) = new_id; vertex.edge(i)->set_data(edata); } lock.lock(); fprintf(vfout, "%u\t%u\n", new_id, vertex.id()); lock.unlock(); }else{ for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edata = vertex.outedge(i)->get_data(); vid_t my_id = edata.my_label(vertex.id(), vertex.outedge(i)->vertex_id()); vid_t nb_id = edata.neighbor_label(vertex.id(), vertex.outedge(i)->vertex_id()); if(my_id == nb_id){ std::cout<<"my_id="<<vertex.id()<<"\tmy_label="<<my_id <<"\tnb_label="<<nb_id <<"\tnb_vid="<<vertex.outedge(i)->vertex_id()<<std::endl; assert(my_id != nb_id); } if(!flag_weight){ lock.lock(); fprintf(efout, "%u\t%u\n", my_id, nb_id); lock.unlock(); }else{ lock.lock(); fprintf(efout, "%u\t%u\t%.3f\n", my_id, nb_id, edata.weight); lock.unlock(); } } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; if (debug) logstream(LOG_DEBUG)<<"Entering node: " << vertex.id() << " seed? " << vdata.seed << " in vector: " << vdata.pvec << std::endl; if (vdata.seed || vertex.num_outedges() == 0) //if this is a seed node, don't do anything return; vec ret = zeros(D); for(int e=0; e < vertex.num_outedges(); e++) { float weight = vertex.edge(e)->get_data(); assert(weight != 0); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; ret += weight * nbr_latent.pvec; } //normalize probabilities assert(sum(ret) != 0); ret = ret / sum(ret); vdata.pvec = alpha * vdata.pvec + (1-alpha)*ret; vdata.pvec/= sum(vdata.pvec); }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.id() >= M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; int howmany = N*knn_sample_percent; assert(howmany > 0 ); vec distances = vec::Zero(howmany); ivec indices = ivec(howmany); for (int i=0; i< howmany; i++){ indices[i]= -2; } std::vector<bool> curratings; curratings.resize(N); for(int e=0; e < vertex.num_edges(); e++) { //no need to calculate this rating since it is given in the training data reference curratings[vertex.edge(e)->vertex_id() - M] = true; } if (knn_sample_percent == 1.0){ for (uint i=M; i< M+N; i++){ if (curratings[i-M]) continue; vertex_data & other = latent_factors_inmem[i]; double dist; als_predict(vdata, other, 0, dist); indices[i-M] = i-M; distances[i-M] = dist; } } else for (int i=0; i<howmany; i++){ int random_other = ::randi(M, M+N-1); vertex_data & other = latent_factors_inmem[random_other]; double dist; als_predict(vdata, other, 0, dist); indices[i-M] = i-M; distances[i-M] = dist; } vec out_dist(num_ratings); ivec indices_sorted = reverse_sort_index2(distances, indices, out_dist, num_ratings); assert(indices_sorted.size() <= num_ratings); assert(out_dist.size() <= num_ratings); vdata.ids = indices_sorted; vdata.ratings = out_dist; if (debug) printf("Closest is: %d with distance %g\n", (int)vdata.ids[0], vdata.ratings[0]); if (vertex.id() % 1000 == 0) printf("Computing recommendaitons for user %d at time: %g\n", vertex.id()+1, mytimer.current_time()); }
/** * Compute size of the relevant intersection of v and a pivot */ int intersection_size(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int start_i) { assert(is_pivot(pivot)); int count = 0; if (pivot > v.id()) { dense_adj &dadj = adjs[pivot - pivot_st]; int vc = v.num_edges(); /** * If the adjacency list sizes are not too different, use * 'merge'-type of operation to compute size intersection. */ if (dadj.count < 32 * (vc - start_i)) { // TODO: do real profiling to find best cutoff value // Do merge-style of check assert(v.edge(start_i)->vertex_id() == pivot); int i1 = 0; int i2 = start_i+1; int nedges = v.num_edges(); while (i1 < dadj.count && i2 < nedges) { vid_t dst = v.edge(i2)->vertexid; vid_t a = dadj.adjlist[i1]; if (a == dst) { /* Add one to edge between v and the match */ v.edge(i2)->set_data(v.edge(i2)->get_data() + 1); count++; i1++; i2++; } else { i1 += a < dst; i2 += a > dst; } } } else { /** * Otherwise, use linear/binary search. */ vid_t lastvid = 0; for(int i=start_i+1; i < vc; i++) { vid_t nb = v.edge(i)->vertexid; if (nb > pivot && nb != lastvid) { int match = findadj(dadj.adjlist, dadj.count, nb); count += match; if (match > 0) { /* Add one to edge between v and the match */ v.edge(i)->set_data(v.edge(i)->get_data() + 1); } } lastvid = nb; } } } return count; }
/** * compute validaton AP for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; vec ratings = zeros(vertex.num_outedges()); vec real_vals = zeros(vertex.num_outedges()); if (ratings.size() > 0){ users_vec[omp_get_thread_num()]++; int j=0; int real_click_count = 0; for(int e=0; e < vertex.num_outedges(); e++) { const EdgeDataType & observation = vertex.edge(e)->get_data(); vertex_data & pdata = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; (*pprediction_func)(vdata, pdata, observation, prediction, NULL); ratings[j] = prediction; real_vals[j] = observation; if (observation > 0) real_click_count++; j++; } int count = 0; double ap = 0; ivec pos = sort_index(ratings); for (int j=0; j< std::min(ap_number, (int)ratings.size()); j++){ if (real_vals[pos[ratings.size() - j - 1]] > 0) ap += (++count * 1.0/(j+1)); } if (real_click_count > 0 ) ap /= real_click_count; else ap = 0; sum_ap_vec[omp_get_thread_num()] += ap; } }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* On subsequent iterations, find the minimum label of my neighbors */ if (!edge_count){ vid_t curmin = vertex_values[vertex.id()]; //first time, count the number of nodes which actually have edges if (gcontext.iteration == 0 && vertex.num_edges() > 0){ mymutex.lock(); actual_vertices++; mymutex.unlock(); } for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } //in case of a new min reschedule neighbors if (vertex_values[vertex.id()] > curmin) { changes++; set_data(vertex, curmin); for (int i=0; i< vertex.num_edges(); i++){ active_nodes[vertex.edge(i)->vertex_id()] = true; } } else active_nodes[vertex.id()] = false; } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); if (vertex.edge(i)->vertex_id() > vertex.id()){ mymutex.lock(); state[curmin]++; mymutex.unlock(); } } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, edge_data> &v, graphchi_context &gcontext) { if (debug) printf("Entered iteration %d with %d\n", gcontext.iteration, is_item(v.id()) ? (v.id() - M + 1): v.id()); /* Even iteration numbers: * 1) load a subset of users into memory (pivots) * 2) Find which subset of items is connected to the users */ if (gcontext.iteration % 2 == 0) { if (adjcontainer->is_pivot(v.id()) && is_user(v.id())) { adjcontainer->load_edges_into_memory(v); if (debug) printf("Loading pivot %d intro memory\n", v.id()); } } /* odd iteration number: * 1) For any item connected to a pivot item * compute itersection */ else { assert(is_item(v.id())); for (int i=0; i< v.num_edges(); i++) { if (!adjcontainer->is_pivot(v.edge(i)->vertex_id())) continue; if (debug) printf("comparing user pivot %d to item %d\n", v.edge(i)->vertex_id()+1 , v.id() - M + 1); adjcontainer->compute_ratings(v, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight); item_pairs_compared++; if (item_pairs_compared % 1000000 == 0) Rcpp::Rcout<< std::setw(10) << mytimer.current_time() << ") " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl; } }//end of iteration % 2 == 1 }//end of update function
/** * compute validaton RMSE for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_outedges(); e++) { double observation = vertex.edge(e)->get_data().weight; uint time = (uint)vertex.edge(e)->get_data().time; vertex_data * time_node = NULL; if (time_nodes){ assert(time >= time_nodes_offset && time < time_nodes_offset+K); time_node = &latent_factors_inmem[time]; } vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; double rmse = (*pprediction_func)(vdata, nbr_latent, observation, prediction, (void*)time_node); assert(rmse <= pow(maxval - minval, 2)); if (time_weighting) rmse *= vertex.edge(e)->get_data().time; assert(validation_rmse_vec.size() > omp_get_thread_num()); validation_rmse_vec[omp_get_thread_num()] += rmse; } }
/** * Grab pivot's adjacency list into memory. */ int load_edges_into_memory(graphchi_vertex<VertexDataType, EdgeDataType> &v) { //assert(is_pivot(v.id())); //assert(is_item(v.id())); int num_edges = v.num_edges(); //not enough user rated this item, we don't need to compare to it if (num_edges < min_allowed_intersection){ if (debug) logstream(LOG_DEBUG)<<"Skipping since num edges: " << num_edges << std::endl; return 0; } // Count how many neighbors have larger id than v dense_adj dadj; for(int i=0; i<num_edges; i++) set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data()); //std::sort(&dadj.adjlist[0], &dadj.adjlist[0] + num_edges); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/); return num_edges; }
/** * Pagerank update function. */ void update(graphchi_vertex<VType, EType> &v, graphchi_context &ginfo) { //array[v.id()]++; if(v.num_edges() == 0) return; if (ginfo.iteration == 0) { //int partid = getPId(v.id()); vid_t newid = getNewId(v.id()); v.set_data(newid); for(int i=0; i<v.num_edges(); i++){ graphchi_edge<EType> * edge = v.edge(i); EType edata = edge->get_data(); edata.my_label(v.id(), edge->vertex_id()) = newid; edge->set_data(edata); } } else if(ginfo.iteration == 1){ /* if(v.id() == 0){ fprintf(fp_list, "%u %u\n", num_vertices, num_edges); } */ if(v.num_outedges() > 0){ vid_t mylabel = v.get_data(); for(int i=0; i<v.num_outedges(); i++){ graphchi_edge<EType> * edge = v.outedge(i); EType edata = edge->get_data(); vid_t nblabel = edata.nb_label(v.id(), edge->vertex_id()); //vid_t nb_id = edge->vertex_id(); assert(mylabel != nblabel); if(!flag_weight){ lock.lock(); fprintf(fp_list, "%u\t%u\n", mylabel, nblabel); lock.unlock(); }else{ lock.lock(); fprintf(fp_list, "%u\t%u\t%.3f\n", mylabel, nblabel, edata.weight); lock.unlock(); } //edge->set_data(edata); } }/*else{ fprintf(fp_list, "\n"); }*/ } }