/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; bool isuser = vertex.id() < (uint)M; if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata.pvec = CoSaMP(XtX, Xty, (int)ceil(sparsity_level*(double)D), 10, 1e-4, D); } else vdata.pvec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Grab pivot's adjacency list into memory. */ int grab_adj(graphchi_vertex<uint32_t, uint32_t> &v) { if(is_pivot(v.id())) { int ncount = v.num_edges(); // Count how many neighbors have larger id than v v.sort_edges_indirect(); int actcount = 0; vid_t lastvid = 0; for(int i=0; i<ncount; i++) { if (v.edge(i)->vertexid > v.id() && v.edge(i)->vertexid != lastvid) actcount++; // Need to store only ids larger than me lastvid = v.edge(i)->vertex_id(); } // Allocate the in-memory adjacency list, using the // knowledge of the number of edges. dense_adj dadj = dense_adj(actcount, (vid_t*) calloc(sizeof(vid_t), actcount)); actcount = 0; lastvid = 0; for(int i=0; i<ncount; i++) { if (v.edge(i)->vertexid > v.id() && v.edge(i)->vertexid != lastvid) { // Need to store only ids larger than me dadj.adjlist[actcount++] = v.edge(i)->vertex_id(); } lastvid = v.edge(i)->vertex_id(); } assert(dadj.count == actcount); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, actcount); return actcount; } return 0; }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if(gcontext.iteration == 0){ VertexDataType vertexdata = vertex.get_data(); if(!vertexdata.confirmed || !vertexdata.reconfirmed) return ; assert(vertex.num_inedges() * vertex.num_outedges() <= product); for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edgedata = vertex.outedge(i)->get_data(); if(edgedata.is_equal()){ /* if(edgedata.smaller_one != 0) std::cout<<edgedata.smaller_one<<" \t"<<edgedata.larger_one<<"\t root="<<root<<std::endl; */ if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){ lock.lock(); fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); continue; } } /* lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); */ } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = is_user(vertex.id()); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data().weight; uint time = vertex.edge(e)->get_data().time; vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; vertex_data & time_node = latent_factors_inmem[time]; assert(time != vertex.id() && time != vertex.edge(e)->vertex_id()); Map<vec> X(nbr_latent.pvec, NLATENT); Map<vec> Y(time_node.pvec, NLATENT); vec XY = X.cwiseProduct(Y); Xty += XY * observation; XtX.triangularView<Eigen::Upper>() += XY * XY.transpose(); if (compute_rmse) { double prediction; vdata.rmse += als_tensor_predict(vdata, nbr_latent, time_node, observation, prediction); } } for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); // Solve the least squares problem with eigen using Cholesky decomposition Map<vec> vdata_vec(vdata.pvec, NLATENT); vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; bool isuser = vertex.id() < M; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); float observation = edge.weight; vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX.triangularView<Eigen::Upper>() += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += pmf_predict(vdata, nbr_latent, observation, prediction, (void*)&edge.avgprd); vertex.edge(e)->set_data(edge); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; // Solve the least squares problem with eigen using Cholesky decomposition mat iAi_; bool ret =inv((isuser? A_U : A_V) + alpha * XtX, iAi_); assert(ret); vec mui_ = iAi_*((isuser? (A_U*mu_U) : (A_V*mu_V)) + alpha * Xty); vdata.pvec = mvnrndex(mui_, iAi_, D, 0); assert(vdata.pvec.size() == D); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType > &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0) { for(int i=0; i < vertex.num_outedges(); i++) { chivector<vid_t> * evector = vertex.outedge(i)->get_vector(); evector->clear(); assert(evector->size() == 0); evector->add(vertex.id()); assert(evector->size() == 1); assert(evector->get(0) == vertex.id()); } } else { for(int i=0; i < vertex.num_inedges(); i++) { graphchi_edge<EdgeDataType> * edge = vertex.inedge(i); chivector<vid_t> * evector = edge->get_vector(); assert(evector->size() >= gcontext.iteration); for(int j=0; j < evector->size(); j++) { vid_t expected = edge->vertex_id() + j; vid_t has = evector->get(j); if (has != expected) { std::cout << "Mismatch: " << has << " != " << expected << std::endl; } assert(has == expected); } } for(int i=0; i < vertex.num_outedges(); i++) { vertex.outedge(i)->get_vector()->add(vertex.id() + gcontext.iteration); } } vertex.set_data(gcontext.iteration + 1); }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * observation; XtX += X * X.transpose(); if (compute_rmse) { double prediction; vdata.rmse += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); bool isuser = vertex.id() < (uint)M; Map<vec> vdata_vec(vdata.pvec, NLATENT); if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata_vec = CoSaMP(XtX, Xty, ceil(sparsity_level*(double)NLATENT), 10, 1e-4, NLATENT); } else vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* On subsequent iterations, find the minimum label of my neighbors */ if (!edge_count){ vid_t curmin = vertex_values[vertex.id()]; if (gcontext.iteration == 0 && vertex.num_edges() > 0){ mymutex.lock(); actual_vertices++; mymutex.unlock(); } for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } if (vertex_values[vertex.id()] > curmin) { changes++; set_data(vertex, curmin); } } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); if (vertex.edge(i)->vertex_id() > vertex.id()){ mymutex.lock(); state[curmin]++; mymutex.unlock(); } } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all samples (rows) if ( vertex.num_outedges() > 0){ assert(vertex.id() < M); vertex_data & row = latent_factors_inmem[vertex.id()]; assert(row.y == -1 || row.y == 1); if (debug) std::cout<<"Entered item " << vertex.id() << " y: " << row.y << std::endl; row.sigma = beta*beta; row.xT_mu = 0; //go over all features for(int e=0; e < vertex.num_outedges(); e++) { uint feature_id = vertex.edge(e)->vertex_id(); edge_data edge = vertex.edge(e)->get_data(); assert(sigma_ij[feature_id] > 0); assert(edge.x_ij == 1); /* compute equation (6) */ row.sigma += edge.x_ij * sigma_ij[feature_id]; /* compute the sum xT*w as needed in equations (7) and (8) */ row.xT_mu += edge.x_ij * mu_ij[feature_id]; } double prediction; double ret = ctr_predict(row, row, row.y, prediction); double predicted_target = prediction < 0 ? -1: 1; if ((predicted_target == -1 && row.y == 1) || (predicted_target == 1 && row.y == -1)) err_vec[omp_get_thread_num()] += 1.0; if (debug) std::cout<<"Prediction was: " << prediction << " real value: " << row.y << std::endl; liklihood_vec[omp_get_thread_num()] += ret; assert(row.sigma > 0); //go over all features for(int e=0; e < vertex.num_outedges(); e++) { edge_data edge = vertex.edge(e)->get_data(); uint feature_id = vertex.edge(e)->vertex_id(); assert(row.sigma > 0); double product = row.y * row.xT_mu / sqrt(row.sigma); mu_ij[feature_id] += (row.y * edge.x_ij * sigma_ij[feature_id] / sqrt(row.sigma)) * v(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " v(product): " << v(product) << " value: " <<(row.y * edge.x_ij * edge.sigma_ij * edge.sigma_ij / sqrt(row.sigma)) * v(product) << std::endl; double factor = 1.0 - (edge.x_ij * sigma_ij[feature_id] / row.sigma)*w(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " w(product): " << w(product) << " factor: " << (1.0 - (edge.x_ij * edge.sigma_ij / row.sigma)*w(product)) << " sigma_ij " << edge.sigma_ij << " product: " << edge.sigma_ij * factor << std::endl; assert(factor > 0); sigma_ij[feature_id] *= factor; assert(sigma_ij[feature_id] > 0); } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.id() >= M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; int howmany = N*knn_sample_percent; assert(howmany > 0 ); vec distances = vec::Zero(howmany); ivec indices = ivec(howmany); for (int i=0; i< howmany; i++){ indices[i]= -2; } std::vector<bool> curratings; curratings.resize(N); for(int e=0; e < vertex.num_edges(); e++) { //no need to calculate this rating since it is given in the training data reference curratings[vertex.edge(e)->vertex_id() - M] = true; } if (knn_sample_percent == 1.0){ for (uint i=M; i< M+N; i++){ if (curratings[i-M]) continue; vertex_data & other = latent_factors_inmem[i]; double dist; als_predict(vdata, other, 0, dist); indices[i-M] = i-M; distances[i-M] = dist; } } else for (int i=0; i<howmany; i++){ int random_other = ::randi(M, M+N-1); vertex_data & other = latent_factors_inmem[random_other]; double dist; als_predict(vdata, other, 0, dist); indices[i-M] = i-M; distances[i-M] = dist; } vec out_dist(num_ratings); ivec indices_sorted = reverse_sort_index2(distances, indices, out_dist, num_ratings); assert(indices_sorted.size() <= num_ratings); assert(out_dist.size() <= num_ratings); vdata.ids = indices_sorted; vdata.ratings = out_dist; if (debug) printf("Closest is: %d with distance %g\n", (int)vdata.ids[0], vdata.ratings[0]); if (vertex.id() % 1000 == 0) printf("Computing recommendaitons for user %d at time: %g\n", vertex.id()+1, mytimer.current_time()); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.get_data().confirmed) { return; } VertexDataType vertexdata = vertex.get_data(); bool propagate = false; if (gcontext.iteration == 0) { /* "Leader" of the SCC */ if (vertexdata.color == vertex.id()) { propagate = true; vertex.remove_alloutedges(); } } else { /* Loop over in-edges and see if there is a match */ bool match = false; for(int i=0; i < vertex.num_outedges(); i++) { if (!vertex.outedge(i)->get_data().deleted()) { if (vertex.outedge(i)->get_data().neighbor_label(vertex.id(), vertex.outedge(i)->vertexid) == vertexdata.color) { match = true; break; } } } if (match) { propagate = true; vertex.remove_alloutedges(); vertex.set_data(SCCinfo(vertexdata.color, true)); } else { vertex.set_data(SCCinfo(vertex.id(), false)); } } if (propagate) { for(int i=0; i < vertex.num_inedges(); i++) { bidirectional_label edgedata = vertex.inedge(i)->get_data(); if (!edgedata.deleted()) { edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertexdata.color; vertex.inedge(i)->set_data(edgedata); gcontext.scheduler->add_task(vertex.inedge(i)->vertexid, true); } } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.id() < (uint)mi.start || vertex.id() >= (uint)mi.end) return; vertex_data& user = latent_factors_inmem[vertex.id()]; bool rows = vertex.id() < (uint)info.get_start_node(false); if (info.is_square()) rows = mi.A_transpose; (void) rows; // unused assert(mi.r_offset >=0); //store previous value for convergence detection if (mi.prev_offset >= 0) user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset]; double val = 0; assert(mi.x_offset >=0 || mi.y_offset>=0); /*** COMPUTE r = c*A*x ********/ if (mi.A_offset && mi.x_offset >= 0){ for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); const vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; val += (edge.weight * movie.pvec[mi.x_offset]); } if (info.is_square() && mi.use_diag)// add the diagonal term val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]); val *= mi.c; } /***** COMPUTE r = c*I*x *****/ else if (!mi.A_offset && mi.x_offset >= 0){ val = mi.c*user.pvec[mi.x_offset]; } /**** COMPUTE r+= d*y (optional) ***/ if (mi.y_offset>= 0){ val += mi.d*user.pvec[mi.y_offset]; } /***** compute r = (... ) / div */ if (mi.div_offset >= 0){ val /= user.pvec[mi.div_offset]; } assert(mi.r_offset>=0 && mi.r_offset < user.pvec.size()); user.pvec[mi.r_offset] = val; } //end update
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all user nodes if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double estScore; rmse_vec[omp_get_thread_num()] += sgd_predict(user, movie, observation, estScore); double err = observation - estScore; if (std::isnan(err) || std::isinf(err)) logstream(LOG_FATAL)<<"SGD got into numerical error. Please tune step size using --sgd_gamma and sgd_lambda" << std::endl; //NOTE: the following code is not thread safe, since potentially several //user nodes may updates this item gradient vector concurrently. However in practice it //did not matter in terms of accuracy on a multicore machine. //if you like to defend the code, you can define a global variable //mutex mymutex; // //and then do: mymutex.lock() movie.pvec += sgd_gamma*(err*user.pvec - sgd_lambda*movie.pvec); //and here add: mymutex.unlock(); user.pvec += sgd_gamma*(err*movie.pvec - sgd_lambda*user.pvec); } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * edge.weight * edge.time; XtX.triangularView<Eigen::Upper>() += X * X.transpose() * edge.time; if (compute_rmse) { double prediction; vdata.rmse += wals_predict(vdata, nbr_latent, edge.weight, prediction) * edge.time; } } // Diagonal for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); // Solve the least squares problem with eigen using Cholesky decomposition Map<vec> vdata_vec(vdata.pvec, NLATENT); vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Grab pivot's adjacency list into memory. */ int load_edges_into_memory(graphchi_vertex<uint32_t, edge_data> &v) { assert(is_pivot(v.id())); assert(is_user(v.id())); int num_edges = v.num_edges(); dense_adj dadj; for(int i=0; i<num_edges; i++) set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight); //dadj.ratings = zeros(N); dadj.vid = v.id(); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/); return num_edges; }
/** * compute validaton RMSE for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_outedges(); e++) { const EdgeDataType & observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; double rmse = (*pprediction_func)(vdata, nbr_latent, observation, prediction, NULL); // assert(rmse <= pow(maxval - minval, 2)); <ice> assert(validation_rmse_vec.size() > omp_get_thread_num()); validation_rmse_vec[omp_get_thread_num()] += rmse; } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0){ if (vertex.num_outedges() == 0 && vertex.id() < M) logstream(LOG_FATAL)<<"NMF algorithm can not work when the row " << vertex.id() << " of the matrix contains all zeros" << std::endl; for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); if (observation < 0 ){ logstream(LOG_FATAL)<<"Found a negative entry in matirx row " << vertex.id() << " with value: " << observation << std::endl; } } return; } bool isuser = (vertex.id() < M); if ((iter % 2 == 1 && !isuser) || (iter % 2 == 0 && isuser)) return; vec ret = zeros(D); vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; rmse_vec[omp_get_thread_num()] += nmf_predict(vdata, nbr_latent, observation, prediction); if (prediction == 0) logstream(LOG_FATAL)<<"Got into numerical error! Please submit a bug report." << std::endl; ret += nbr_latent.pvec * (observation / prediction); } vec px; if (isuser) px = sum_of_item_latent_features; else px = sum_of_user_latent_feautres; for (int i=0; i<D; i++){ assert(px[i] != 0); vdata.pvec[i] *= ret[i] / px[i]; if (vdata.pvec[i] < epsilon) vdata.pvec[i] = epsilon; } }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* This program requires selective scheduling. */ assert(gcontext.scheduler != NULL); if(gcontext.iteration == 0) { set_data(vertex, vertex.id()); /* Schedule neighbor for update */ gcontext.scheduler->add_task(vertex.id()); return; } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } if ( curmin < vertex.get_data() ) { for(int i=0; i < vertex.num_edges(); i++) { if (curmin < neighbor_value(vertex.edge(i))) { /* Schedule neighbor for update */ gcontext.scheduler->add_task(vertex.edge(i)->vertex_id()); } } set_data(vertex, curmin); } } /* On subsequent iterations, find the minimum label of my neighbors */ /* If my label changes, schedule neighbors */ }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { int ninedges = 0; if (gcontext.iteration == 0) { for(int i=0; i < vertex.num_inedges(); i++) { vertex.inedge(i)->set_data(vertex.id()); ninedges++; } } else { // Keep track of the number of edegs to ensure that // deletion works fine. if (vertex.get_data() != vertex.num_inedges()) { logstream(LOG_ERROR) << "Discrepancy in edge counts: " << vertex.get_data() << " != " << vertex.num_inedges() << std::endl; } assert(vertex.get_data() == vertex.num_inedges()); for(int i=0; i < vertex.num_outedges(); i++) { graphchi_edge<vid_t> * edge = vertex.outedge(i); vid_t outedgedata = edge->get_data(); vid_t expected = edge->vertex_id() + gcontext.iteration - (edge->vertex_id() > vertex.id()); if (!is_deleted_edge_value(edge->get_data())) { if (outedgedata != expected) { logstream(LOG_ERROR) << outedgedata << " != " << expected << std::endl; assert(false); } } } for(int i=0; i < vertex.num_inedges(); i++) { vertex.inedge(i)->set_data(vertex.id() + gcontext.iteration); if (std::rand() % 4 == 1) { vertex.remove_inedge(i); __sync_add_and_fetch(&ndeleted, 1); } else { ninedges++; } } } if (gcontext.iteration == gcontext.num_iterations - 1) { vertex.set_data(gcontext.iteration + 1); } else { vertex.set_data(ninedges); } }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { assert(vertex.num_inedges() * vertex.num_outedges() <= product); for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edgedata = vertex.outedge(i)->get_data(); if(edgedata.is_equal()){ if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){ lock.lock(); fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); continue; } } lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); } }
/** * Pagerank update function. */ void update(graphchi_vertex<VType, EType> &v, graphchi_context &ginfo) { //array[v.id()]++; if(v.num_edges() == 0) return; if (ginfo.iteration == 0) { //int partid = getPId(v.id()); vid_t newid = getNewId(v.id()); v.set_data(newid); for(int i=0; i<v.num_edges(); i++){ graphchi_edge<EType> * edge = v.edge(i); EType edata = edge->get_data(); edata.my_label(v.id(), edge->vertex_id()) = newid; edge->set_data(edata); } } else if(ginfo.iteration == 1){ /* if(v.id() == 0){ fprintf(fp_list, "%u %u\n", num_vertices, num_edges); } */ if(v.num_outedges() > 0){ vid_t mylabel = v.get_data(); for(int i=0; i<v.num_outedges(); i++){ graphchi_edge<EType> * edge = v.outedge(i); EType edata = edge->get_data(); vid_t nblabel = edata.nb_label(v.id(), edge->vertex_id()); //vid_t nb_id = edge->vertex_id(); assert(mylabel != nblabel); if(!flag_weight){ lock.lock(); fprintf(fp_list, "%u\t%u\n", mylabel, nblabel); lock.unlock(); }else{ lock.lock(); fprintf(fp_list, "%u\t%u\t%.3f\n", mylabel, nblabel, edata.weight); lock.unlock(); } //edge->set_data(edata); } }/*else{ fprintf(fp_list, "\n"); }*/ } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all user nodes if ( vertex.num_outedges() > 0 && (algo == GLOBAL_MEAN || algo == USER_MEAN)){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings if (algo == USER_MEAN){ for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); user.mean_rating += observation; } if (vertex.num_edges() > 0) user.mean_rating /= vertex.num_edges(); } //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { double prediction; float observation = vertex.edge(e)->get_data(); vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; rmse_vec[omp_get_thread_num()] += baseline_predict(user, movie, observation, prediction); } } else if (vertex.num_inedges() > 0 && algo == ITEM_MEAN){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); user.mean_rating += observation; } if (vertex.num_edges() > 0) user.mean_rating /= vertex.num_edges(); for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); double prediction; vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; rmse_vec[omp_get_thread_num()] += baseline_predict(movie, user, observation, prediction); } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; memset(&user.weight[0], 0, sizeof(double)*D); for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; user.weight += movie.weight; } // sqrt(|N(u)|) float usrNorm = double(1.0/sqrt(vertex.num_outedges())); //sqrt(|N(u)| * sum_j y_j user.weight *= usrNorm; vec step = zeros(D); // main algorithm, see Koren's paper, just below below equation (16) for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); double estScore; rmse_vec[omp_get_thread_num()] += svdpp_predict(user, movie,observation, estScore); // e_ui = r_ui - \hat{r_ui} float err = observation - estScore; assert(!std::isnan(rmse_vec[omp_get_thread_num()])); vec itmFctr = movie.pvec; vec usrFctr = user.pvec; //q_i = q_i + gamma2 *(e_ui*(p_u + sqrt(N(U))\sum_j y_j) - gamma7 *q_i) for (int j=0; j< D; j++) movie.pvec[j] += svdpp.itmFctrStep*(err*(usrFctr[j] + user.weight[j]) - svdpp.itmFctrReg*itmFctr[j]); //p_u = p_u + gamma2 *(e_ui*q_i -gamma7 *p_u) for (int j=0; j< D; j++) user.pvec[j] += svdpp.usrFctrStep*(err *itmFctr[j] - svdpp.usrFctrReg*usrFctr[j]); step += err*itmFctr; //b_i = b_i + gamma1*(e_ui - gmma6 * b_i) movie.bias += svdpp.itmBiasStep*(err-svdpp.itmBiasReg* movie.bias); //b_u = b_u + gamma1*(e_ui - gamma6 * b_u) user.bias += svdpp.usrBiasStep*(err-svdpp.usrBiasReg* user.bias); } step *= float(svdpp.itmFctr2Step*usrNorm); //gamma7 double mult = svdpp.itmFctr2Step*svdpp.itmFctr2Reg; for(int e=0; e < vertex.num_edges(); e++) { vertex_data& movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; //y_j = y_j + gamma2*sqrt|N(u)| * q_i - gamma7 * y_j movie.weight += step - mult * movie.weight; } } }
/** * Compute size of the relevant intersection of v and a pivot */ int intersection_size(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int start_i) { assert(is_pivot(pivot)); int count = 0; if (pivot > v.id()) { dense_adj &dadj = adjs[pivot - pivot_st]; int vc = v.num_edges(); /** * If the adjacency list sizes are not too different, use * 'merge'-type of operation to compute size intersection. */ if (dadj.count < 32 * (vc - start_i)) { // TODO: do real profiling to find best cutoff value // Do merge-style of check assert(v.edge(start_i)->vertex_id() == pivot); int i1 = 0; int i2 = start_i+1; int nedges = v.num_edges(); while (i1 < dadj.count && i2 < nedges) { vid_t dst = v.edge(i2)->vertexid; vid_t a = dadj.adjlist[i1]; if (a == dst) { /* Add one to edge between v and the match */ v.edge(i2)->set_data(v.edge(i2)->get_data() + 1); count++; i1++; i2++; } else { i1 += a < dst; i2 += a > dst; } } } else { /** * Otherwise, use linear/binary search. */ vid_t lastvid = 0; for(int i=start_i+1; i < vc; i++) { vid_t nb = v.edge(i)->vertexid; if (nb > pivot && nb != lastvid) { int match = findadj(dadj.adjlist, dadj.count, nb); count += match; if (match > 0) { /* Add one to edge between v and the match */ v.edge(i)->set_data(v.edge(i)->get_data() + 1); } } lastvid = nb; } } } return count; }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.num_outedges() > 0){ assert(vertex.id() < Me); assert(validation_targets[vertex.id()] == -1 || validation_targets[vertex.id()] == 1); double sum = 0; for(int e=0; e < vertex.num_outedges(); e++) { uint other = vertex.edge(e)->vertex_id(); assert(other >= M); sum += mu_ij[other]; } double p0 = phi(-1 * sum / sqrt(beta)); double p1 = phi(1 * sum / sqrt(beta)); double predict = sum > 0 ? 1 : -1; latent_factors_inmem[vertex.id()].predict = sum; if (predict != validation_targets[vertex.id()]) err_vec[omp_get_thread_num()]++; if (debug) std::cout<<"node: " << vertex.id() << " sum is: " << sum << " p0: " << p0 << " p1: " << p1 << " target: " << validation_targets[vertex.id()] << std::endl; } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; if (debug) logstream(LOG_DEBUG)<<"Entering node: " << vertex.id() << " seed? " << vdata.seed << " in vector: " << vdata.pvec << std::endl; if (vdata.seed || vertex.num_outedges() == 0) //if this is a seed node, don't do anything return; vec ret = zeros(D); for(int e=0; e < vertex.num_outedges(); e++) { float weight = vertex.edge(e)->get_data(); assert(weight != 0); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; ret += weight * nbr_latent.pvec; } //normalize probabilities assert(sum(ret) != 0); ret = ret / sum(ret); vdata.pvec = alpha * vdata.pvec + (1-alpha)*ret; vdata.pvec/= sum(vdata.pvec); }
void update(graphchi_vertex<VertexDataType, EdgeDataType>& v, graphchi_context& ginfo) { if (ginfo.iteration > 0) { float sum = 0; for (int i = 0; i < v.num_inedges(); i++) { sum += pr[v.inedge(i)->vertexid]; } if (v.outc > 0) { pr[v.id()] = (RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum) / v.outc; } else { pr[v.id()] = (RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum); } } else if (ginfo.iteration == 0) { if (v.outc > 0) pr[v.id()] = 1.0f / v.outc; } if (ginfo.iteration == ginfo.num_iterations - 1) { /* On last iteration, multiply pr by degree and store the result */ v.set_data(v.outc > 0 ? pr[v.id()] * v.outc : pr[v.id()]); } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (first_iteration) { vertex.set_data(SCCinfo(vertex.id())); } if (vertex.get_data().confirmed) { return; } /* Vertices with only in or out edges cannot be part of a SCC (Trimming) */ if (vertex.num_inedges() == 0 || vertex.num_outedges() == 0) { if (vertex.num_edges() > 0) { // TODO: check this logic! vertex.set_data(SCCinfo(vertex.id())); } vertex.remove_alledges(); return; } remainingvertices = true; VertexDataType vertexdata = vertex.get_data(); bool propagate = false; if (gcontext.iteration == 0) { vertexdata = vertex.id(); propagate = true; /* Clean up in-edges. This would be nicer in the messaging abstraction... */ for(int i=0; i < vertex.num_inedges(); i++) { bidirectional_label edgedata = vertex.inedge(i)->get_data(); edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertex.id(); vertex.inedge(i)->set_data(edgedata); } } else { /* Loop over in-edges and choose minimum color */ vid_t minid = vertexdata.color; for(int i=0; i < vertex.num_inedges(); i++) { minid = std::min(minid, vertex.inedge(i)->get_data().neighbor_label(vertex.id(), vertex.inedge(i)->vertexid)); } if (minid != vertexdata.color) { vertexdata.color = minid; propagate = true; } } vertex.set_data(vertexdata); if (propagate) { for(int i=0; i < vertex.num_outedges(); i++) { bidirectional_label edgedata = vertex.outedge(i)->get_data(); edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid) = vertexdata.color; vertex.outedge(i)->set_data(edgedata); gcontext.scheduler->add_task(vertex.outedge(i)->vertexid, true); } } }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* On subsequent iterations, find the minimum label of my neighbors */ if (!edge_count){ vid_t curmin = vertex_values[vertex.id()]; //first time, count the number of nodes which actually have edges if (gcontext.iteration == 0 && vertex.num_edges() > 0){ mymutex.lock(); actual_vertices++; mymutex.unlock(); } for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } //in case of a new min reschedule neighbors if (vertex_values[vertex.id()] > curmin) { changes++; set_data(vertex, curmin); for (int i=0; i< vertex.num_edges(); i++){ active_nodes[vertex.edge(i)->vertex_id()] = true; } } else active_nodes[vertex.id()] = false; } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); if (vertex.edge(i)->vertex_id() > vertex.id()){ mymutex.lock(); state[curmin]++; mymutex.unlock(); } } } }
/** * compute validaton AP for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; vec ratings = zeros(vertex.num_outedges()); vec real_vals = zeros(vertex.num_outedges()); if (ratings.size() > 0){ users_vec[omp_get_thread_num()]++; int j=0; int real_click_count = 0; for(int e=0; e < vertex.num_outedges(); e++) { const EdgeDataType & observation = vertex.edge(e)->get_data(); vertex_data & pdata = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; (*pprediction_func)(vdata, pdata, observation, prediction, NULL); ratings[j] = prediction; real_vals[j] = observation; if (observation > 0) real_click_count++; j++; } int count = 0; double ap = 0; ivec pos = sort_index(ratings); for (int j=0; j< std::min(ap_number, (int)ratings.size()); j++){ if (real_vals[pos[ratings.size() - j - 1]] > 0) ap += (++count * 1.0/(j+1)); } if (real_click_count > 0 ) ap /= real_click_count; else ap = 0; sum_ap_vec[omp_get_thread_num()] += ap; } }