/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType > &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0) { for(int i=0; i < vertex.num_outedges(); i++) { chivector<vid_t> * evector = vertex.outedge(i)->get_vector(); evector->clear(); assert(evector->size() == 0); evector->add(vertex.id()); assert(evector->size() == 1); assert(evector->get(0) == vertex.id()); } } else { for(int i=0; i < vertex.num_inedges(); i++) { graphchi_edge<EdgeDataType> * edge = vertex.inedge(i); chivector<vid_t> * evector = edge->get_vector(); assert(evector->size() >= gcontext.iteration); for(int j=0; j < evector->size(); j++) { vid_t expected = edge->vertex_id() + j; vid_t has = evector->get(j); if (has != expected) { std::cout << "Mismatch: " << has << " != " << expected << std::endl; } assert(has == expected); } } for(int i=0; i < vertex.num_outedges(); i++) { vertex.outedge(i)->get_vector()->add(vertex.id() + gcontext.iteration); } } vertex.set_data(gcontext.iteration + 1); }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if(gcontext.iteration == 0){ VertexDataType vertexdata = vertex.get_data(); if(!vertexdata.confirmed || !vertexdata.reconfirmed) return ; assert(vertex.num_inedges() * vertex.num_outedges() <= product); for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edgedata = vertex.outedge(i)->get_data(); if(edgedata.is_equal()){ /* if(edgedata.smaller_one != 0) std::cout<<edgedata.smaller_one<<" \t"<<edgedata.larger_one<<"\t root="<<root<<std::endl; */ if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){ lock.lock(); fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); continue; } } /* lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); */ } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all samples (rows) if ( vertex.num_outedges() > 0){ assert(vertex.id() < M); vertex_data & row = latent_factors_inmem[vertex.id()]; assert(row.y == -1 || row.y == 1); if (debug) std::cout<<"Entered item " << vertex.id() << " y: " << row.y << std::endl; row.sigma = beta*beta; row.xT_mu = 0; //go over all features for(int e=0; e < vertex.num_outedges(); e++) { uint feature_id = vertex.edge(e)->vertex_id(); edge_data edge = vertex.edge(e)->get_data(); assert(sigma_ij[feature_id] > 0); assert(edge.x_ij == 1); /* compute equation (6) */ row.sigma += edge.x_ij * sigma_ij[feature_id]; /* compute the sum xT*w as needed in equations (7) and (8) */ row.xT_mu += edge.x_ij * mu_ij[feature_id]; } double prediction; double ret = ctr_predict(row, row, row.y, prediction); double predicted_target = prediction < 0 ? -1: 1; if ((predicted_target == -1 && row.y == 1) || (predicted_target == 1 && row.y == -1)) err_vec[omp_get_thread_num()] += 1.0; if (debug) std::cout<<"Prediction was: " << prediction << " real value: " << row.y << std::endl; liklihood_vec[omp_get_thread_num()] += ret; assert(row.sigma > 0); //go over all features for(int e=0; e < vertex.num_outedges(); e++) { edge_data edge = vertex.edge(e)->get_data(); uint feature_id = vertex.edge(e)->vertex_id(); assert(row.sigma > 0); double product = row.y * row.xT_mu / sqrt(row.sigma); mu_ij[feature_id] += (row.y * edge.x_ij * sigma_ij[feature_id] / sqrt(row.sigma)) * v(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " v(product): " << v(product) << " value: " <<(row.y * edge.x_ij * edge.sigma_ij * edge.sigma_ij / sqrt(row.sigma)) * v(product) << std::endl; double factor = 1.0 - (edge.x_ij * sigma_ij[feature_id] / row.sigma)*w(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " w(product): " << w(product) << " factor: " << (1.0 - (edge.x_ij * edge.sigma_ij / row.sigma)*w(product)) << " sigma_ij " << edge.sigma_ij << " product: " << edge.sigma_ij * factor << std::endl; assert(factor > 0); sigma_ij[feature_id] *= factor; assert(sigma_ij[feature_id] > 0); } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (first_iteration) { vertex.set_data(SCCinfo(vertex.id())); } if (vertex.get_data().confirmed) { return; } /* Vertices with only in or out edges cannot be part of a SCC (Trimming) */ if (vertex.num_inedges() == 0 || vertex.num_outedges() == 0) { if (vertex.num_edges() > 0) { // TODO: check this logic! vertex.set_data(SCCinfo(vertex.id())); } vertex.remove_alledges(); return; } remainingvertices = true; VertexDataType vertexdata = vertex.get_data(); bool propagate = false; if (gcontext.iteration == 0) { vertexdata = vertex.id(); propagate = true; /* Clean up in-edges. This would be nicer in the messaging abstraction... */ for(int i=0; i < vertex.num_inedges(); i++) { bidirectional_label edgedata = vertex.inedge(i)->get_data(); edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertex.id(); vertex.inedge(i)->set_data(edgedata); } } else { /* Loop over in-edges and choose minimum color */ vid_t minid = vertexdata.color; for(int i=0; i < vertex.num_inedges(); i++) { minid = std::min(minid, vertex.inedge(i)->get_data().neighbor_label(vertex.id(), vertex.inedge(i)->vertexid)); } if (minid != vertexdata.color) { vertexdata.color = minid; propagate = true; } } vertex.set_data(vertexdata); if (propagate) { for(int i=0; i < vertex.num_outedges(); i++) { bidirectional_label edgedata = vertex.outedge(i)->get_data(); edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid) = vertexdata.color; vertex.outedge(i)->set_data(edgedata); gcontext.scheduler->add_task(vertex.outedge(i)->vertexid, true); } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; memset(&user.weight[0], 0, sizeof(double)*D); for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; user.weight += movie.weight; } // sqrt(|N(u)|) float usrNorm = double(1.0/sqrt(vertex.num_outedges())); //sqrt(|N(u)| * sum_j y_j user.weight *= usrNorm; vec step = zeros(D); // main algorithm, see Koren's paper, just below below equation (16) for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); double estScore; rmse_vec[omp_get_thread_num()] += svdpp_predict(user, movie,observation, estScore); // e_ui = r_ui - \hat{r_ui} float err = observation - estScore; assert(!std::isnan(rmse_vec[omp_get_thread_num()])); vec itmFctr = movie.pvec; vec usrFctr = user.pvec; //q_i = q_i + gamma2 *(e_ui*(p_u + sqrt(N(U))\sum_j y_j) - gamma7 *q_i) for (int j=0; j< D; j++) movie.pvec[j] += svdpp.itmFctrStep*(err*(usrFctr[j] + user.weight[j]) - svdpp.itmFctrReg*itmFctr[j]); //p_u = p_u + gamma2 *(e_ui*q_i -gamma7 *p_u) for (int j=0; j< D; j++) user.pvec[j] += svdpp.usrFctrStep*(err *itmFctr[j] - svdpp.usrFctrReg*usrFctr[j]); step += err*itmFctr; //b_i = b_i + gamma1*(e_ui - gmma6 * b_i) movie.bias += svdpp.itmBiasStep*(err-svdpp.itmBiasReg* movie.bias); //b_u = b_u + gamma1*(e_ui - gamma6 * b_u) user.bias += svdpp.usrBiasStep*(err-svdpp.usrBiasReg* user.bias); } step *= float(svdpp.itmFctr2Step*usrNorm); //gamma7 double mult = svdpp.itmFctr2Step*svdpp.itmFctr2Reg; for(int e=0; e < vertex.num_edges(); e++) { vertex_data& movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; //y_j = y_j + gamma2*sqrt|N(u)| * q_i - gamma7 * y_j movie.weight += step - mult * movie.weight; } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; bool isuser = vertex.id() < (uint)M; if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata.pvec = CoSaMP(XtX, Xty, (int)ceil(sparsity_level*(double)D), 10, 1e-4, D); } else vdata.pvec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all user nodes if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double estScore; rmse_vec[omp_get_thread_num()] += sgd_predict(user, movie, observation, estScore); double err = observation - estScore; if (std::isnan(err) || std::isinf(err)) logstream(LOG_FATAL)<<"SGD got into numerical error. Please tune step size using --sgd_gamma and sgd_lambda" << std::endl; //NOTE: the following code is not thread safe, since potentially several //user nodes may updates this item gradient vector concurrently. However in practice it //did not matter in terms of accuracy on a multicore machine. //if you like to defend the code, you can define a global variable //mutex mymutex; // //and then do: mymutex.lock() movie.pvec += sgd_gamma*(err*user.pvec - sgd_lambda*movie.pvec); //and here add: mymutex.unlock(); user.pvec += sgd_gamma*(err*movie.pvec - sgd_lambda*user.pvec); } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * observation; XtX += X * X.transpose(); if (compute_rmse) { double prediction; vdata.rmse += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); bool isuser = vertex.id() < (uint)M; Map<vec> vdata_vec(vdata.pvec, NLATENT); if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata_vec = CoSaMP(XtX, Xty, ceil(sparsity_level*(double)NLATENT), 10, 1e-4, NLATENT); } else vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; bool isuser = vertex.id() < M; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); float observation = edge.weight; vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX.triangularView<Eigen::Upper>() += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += pmf_predict(vdata, nbr_latent, observation, prediction, (void*)&edge.avgprd); vertex.edge(e)->set_data(edge); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; // Solve the least squares problem with eigen using Cholesky decomposition mat iAi_; bool ret =inv((isuser? A_U : A_V) + alpha * XtX, iAi_); assert(ret); vec mui_ = iAi_*((isuser? (A_U*mu_U) : (A_V*mu_V)) + alpha * Xty); vdata.pvec = mvnrndex(mui_, iAi_, D, 0); assert(vdata.pvec.size() == D); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * edge.weight * edge.time; XtX.triangularView<Eigen::Upper>() += X * X.transpose() * edge.time; if (compute_rmse) { double prediction; vdata.rmse += wals_predict(vdata, nbr_latent, edge.weight, prediction) * edge.time; } } // Diagonal for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); // Solve the least squares problem with eigen using Cholesky decomposition Map<vec> vdata_vec(vdata.pvec, NLATENT); vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { assert(vertex.num_inedges() * vertex.num_outedges() <= product); for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edgedata = vertex.outedge(i)->get_data(); if(edgedata.is_equal()){ if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){ lock.lock(); fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); continue; } } lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); } }
/** * Pagerank update function. */ void update(graphchi_vertex<VType, EType> &v, graphchi_context &ginfo) { //array[v.id()]++; if(v.num_edges() == 0) return; if (ginfo.iteration == 0) { //int partid = getPId(v.id()); vid_t newid = getNewId(v.id()); v.set_data(newid); for(int i=0; i<v.num_edges(); i++){ graphchi_edge<EType> * edge = v.edge(i); EType edata = edge->get_data(); edata.my_label(v.id(), edge->vertex_id()) = newid; edge->set_data(edata); } } else if(ginfo.iteration == 1){ /* if(v.id() == 0){ fprintf(fp_list, "%u %u\n", num_vertices, num_edges); } */ if(v.num_outedges() > 0){ vid_t mylabel = v.get_data(); for(int i=0; i<v.num_outedges(); i++){ graphchi_edge<EType> * edge = v.outedge(i); EType edata = edge->get_data(); vid_t nblabel = edata.nb_label(v.id(), edge->vertex_id()); //vid_t nb_id = edge->vertex_id(); assert(mylabel != nblabel); if(!flag_weight){ lock.lock(); fprintf(fp_list, "%u\t%u\n", mylabel, nblabel); lock.unlock(); }else{ lock.lock(); fprintf(fp_list, "%u\t%u\t%.3f\n", mylabel, nblabel, edata.weight); lock.unlock(); } //edge->set_data(edata); } }/*else{ fprintf(fp_list, "\n"); }*/ } }
/** * Pagerank update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) { float sum=0; if (ginfo.iteration == 0) { /* On first iteration, initialize vertex and out-edges. The initialization is important, because on every run, GraphChi will modify the data in the edges on disk. */ for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<float> * edge = v.outedge(i); edge->set_data(1.0 / v.num_outedges()); } v.set_data(RANDOMRESETPROB); } else { /* Compute the sum of neighbors' weighted pageranks by reading from the in-edges. */ for(int i=0; i < v.num_inedges(); i++) { float val = v.inedge(i)->get_data(); sum += val; } /* Compute my pagerank */ float pagerank = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum; /* Write my pagerank divided by the number of out-edges to each of my out-edges. */ if (v.num_outedges() > 0) { float pagerankcont = pagerank / v.num_outedges(); for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<float> * edge = v.outedge(i); edge->set_data(pagerankcont); } } /* Keep track of the progression of the computation. GraphChi engine writes a file filename.deltalog. */ ginfo.log_change(std::abs(pagerank - v.get_data())); /* Set my new pagerank as the vertex value */ v.set_data(pagerank); } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.num_outedges() > 0){ assert(vertex.id() < Me); assert(validation_targets[vertex.id()] == -1 || validation_targets[vertex.id()] == 1); double sum = 0; for(int e=0; e < vertex.num_outedges(); e++) { uint other = vertex.edge(e)->vertex_id(); assert(other >= M); sum += mu_ij[other]; } double p0 = phi(-1 * sum / sqrt(beta)); double p1 = phi(1 * sum / sqrt(beta)); double predict = sum > 0 ? 1 : -1; latent_factors_inmem[vertex.id()].predict = sum; if (predict != validation_targets[vertex.id()]) err_vec[omp_get_thread_num()]++; if (debug) std::cout<<"node: " << vertex.id() << " sum is: " << sum << " p0: " << p0 << " p1: " << p1 << " target: " << validation_targets[vertex.id()] << std::endl; } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; if (debug) logstream(LOG_DEBUG)<<"Entering node: " << vertex.id() << " seed? " << vdata.seed << " in vector: " << vdata.pvec << std::endl; if (vdata.seed || vertex.num_outedges() == 0) //if this is a seed node, don't do anything return; vec ret = zeros(D); for(int e=0; e < vertex.num_outedges(); e++) { float weight = vertex.edge(e)->get_data(); assert(weight != 0); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; ret += weight * nbr_latent.pvec; } //normalize probabilities assert(sum(ret) != 0); ret = ret / sum(ret); vdata.pvec = alpha * vdata.pvec + (1-alpha)*ret; vdata.pvec/= sum(vdata.pvec); }
/** * compute validaton AP for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; vec ratings = zeros(vertex.num_outedges()); vec real_vals = zeros(vertex.num_outedges()); if (ratings.size() > 0){ users_vec[omp_get_thread_num()]++; int j=0; int real_click_count = 0; for(int e=0; e < vertex.num_outedges(); e++) { const EdgeDataType & observation = vertex.edge(e)->get_data(); vertex_data & pdata = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; (*pprediction_func)(vdata, pdata, observation, prediction, NULL); ratings[j] = prediction; real_vals[j] = observation; if (observation > 0) real_click_count++; j++; } int count = 0; double ap = 0; ivec pos = sort_index(ratings); for (int j=0; j< std::min(ap_number, (int)ratings.size()); j++){ if (real_vals[pos[ratings.size() - j - 1]] > 0) ap += (++count * 1.0/(j+1)); } if (real_click_count > 0 ) ap /= real_click_count; else ap = 0; sum_ap_vec[omp_get_thread_num()] += ap; } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.get_data().confirmed) { return; } VertexDataType vertexdata = vertex.get_data(); bool propagate = false; if (gcontext.iteration == 0) { /* "Leader" of the SCC */ if (vertexdata.color == vertex.id()) { propagate = true; vertex.remove_alloutedges(); } } else { /* Loop over in-edges and see if there is a match */ bool match = false; for(int i=0; i < vertex.num_outedges(); i++) { if (!vertex.outedge(i)->get_data().deleted()) { if (vertex.outedge(i)->get_data().neighbor_label(vertex.id(), vertex.outedge(i)->vertexid) == vertexdata.color) { match = true; break; } } } if (match) { propagate = true; vertex.remove_alloutedges(); vertex.set_data(SCCinfo(vertexdata.color, true)); } else { vertex.set_data(SCCinfo(vertex.id(), false)); } } if (propagate) { for(int i=0; i < vertex.num_inedges(); i++) { bidirectional_label edgedata = vertex.inedge(i)->get_data(); if (!edgedata.deleted()) { edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertexdata.color; vertex.inedge(i)->set_data(edgedata); gcontext.scheduler->add_task(vertex.inedge(i)->vertexid, true); } } } }
/** * Update the weigthed edge chivector * We first obtain the edge weight from the first element, sum them, then update the * second item by eacg edge's weight */ void update_edge_data(graphchi_vertex<VertexDataType, EdgeDataType> &v, float quota, bool first){ float sum = 0.0; //if(first) for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<EdgeDataType> * edge = v.outedge(i); if (edge != NULL) { chivector<float> * evector = edge->get_vector(); //std::cout << evector->size() << std::endl; /*if (first) assert(evector->size() == 1); else assert(evector->size() == 2); assert(evector->size() == 2);*/ std::cout << v.id() << " with data: " << evector->get(0) << std::endl; sum += evector->get(0); /*if (first){ evector->add(sum); assert(evector->size() == 2); }*/ } } for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<EdgeDataType> * edge = v.outedge(i); if (edge != NULL) { chivector<float> * evector = edge->get_vector(); // assert(evector->size() == 2); float val = quota * evector->get(0) / sum; //evector->set(1, val); if(first && (evector->size() == 1)) evector->add(val); evector->set(1, val); //std::cout << v.id() << " with data: " << evector->get(0) << std::endl; } } }
/** * Update the weigthed edge chivector * We first obtain the edge weight from the first element, sum them, then update the * second item by eacg edge's weight */ void update_edge_data(graphchi_vertex<VertexDataType, EdgeDataType> &v, float quota){ float sum = 0.0; for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<EdgeDataType> * edge = v.outedge(i); //We store the weight value to the edge->weight field and then sum them /*if(first) edge->set_weight(edge->get_data());*/ struct weightE eData = edge->get_data(); //sum += eData.weight; sum ++; //if(!first) // std::cout << v.id() << " with data: " << edge->get_data() << " with weight " << edge->get_weight() << std::endl; } for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<EdgeDataType> * edge = v.outedge(i); struct weightE eData = edge->get_data(); //eData.pagerank = quota * eData.weight / sum; eData.pagerank = quota * 1.0 / sum; edge->set_data(eData); if (v.id() == 3845) std::cout << v.id() << " -> " << edge->vertex_id() << " with data: " << eData.pagerank << " with weight " << eData.weight << std::endl; } }
/** Scores all documents for the query. The first step in update(). */ void score_documents(graphchi_vertex<TypeVertex, FeatureEdge> &query, graphchi_context &ginfo) { // XXX // std::map<double, FeatureEdge> scores; for (int doc = 0; doc < query.num_outedges(); doc++) { FeatureEdge* fe = query.outedge(doc)->get_vector(); fe->header().score = model->score(fe->get_data()); // query.outedge(doc)->set_vector(fe); // scores[fe.score] = fe; } // for (auto rit = scores.crbegin(); rit != scores.crend(); ++rit) { // std::cout << "Score " << query.id() // << ": " << rit->second.str() << std::endl; // } }
/** * compute validaton RMSE for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_outedges(); e++) { const EdgeDataType & observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; double rmse = (*pprediction_func)(vdata, nbr_latent, observation, prediction, NULL); // assert(rmse <= pow(maxval - minval, 2)); <ice> assert(validation_rmse_vec.size() > omp_get_thread_num()); validation_rmse_vec[omp_get_thread_num()] += rmse; } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0){ if (vertex.num_outedges() == 0 && vertex.id() < M) logstream(LOG_FATAL)<<"NMF algorithm can not work when the row " << vertex.id() << " of the matrix contains all zeros" << std::endl; for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); if (observation < 0 ){ logstream(LOG_FATAL)<<"Found a negative entry in matirx row " << vertex.id() << " with value: " << observation << std::endl; } } return; } bool isuser = (vertex.id() < M); if ((iter % 2 == 1 && !isuser) || (iter % 2 == 0 && isuser)) return; vec ret = zeros(D); vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; rmse_vec[omp_get_thread_num()] += nmf_predict(vdata, nbr_latent, observation, prediction); if (prediction == 0) logstream(LOG_FATAL)<<"Got into numerical error! Please submit a bug report." << std::endl; ret += nbr_latent.pvec * (observation / prediction); } vec px; if (isuser) px = sum_of_item_latent_features; else px = sum_of_user_latent_feautres; for (int i=0; i<D; i++){ assert(px[i] != 0); vdata.pvec[i] *= ret[i] / px[i]; if (vdata.pvec[i] < epsilon) vdata.pvec[i] = epsilon; } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { int ninedges = 0; if (gcontext.iteration == 0) { for(int i=0; i < vertex.num_inedges(); i++) { vertex.inedge(i)->set_data(vertex.id()); ninedges++; } } else { // Keep track of the number of edegs to ensure that // deletion works fine. if (vertex.get_data() != vertex.num_inedges()) { logstream(LOG_ERROR) << "Discrepancy in edge counts: " << vertex.get_data() << " != " << vertex.num_inedges() << std::endl; } assert(vertex.get_data() == vertex.num_inedges()); for(int i=0; i < vertex.num_outedges(); i++) { graphchi_edge<vid_t> * edge = vertex.outedge(i); vid_t outedgedata = edge->get_data(); vid_t expected = edge->vertex_id() + gcontext.iteration - (edge->vertex_id() > vertex.id()); if (!is_deleted_edge_value(edge->get_data())) { if (outedgedata != expected) { logstream(LOG_ERROR) << outedgedata << " != " << expected << std::endl; assert(false); } } } for(int i=0; i < vertex.num_inedges(); i++) { vertex.inedge(i)->set_data(vertex.id() + gcontext.iteration); if (std::rand() % 4 == 1) { vertex.remove_inedge(i); __sync_add_and_fetch(&ndeleted, 1); } else { ninedges++; } } } if (gcontext.iteration == gcontext.num_iterations - 1) { vertex.set_data(gcontext.iteration + 1); } else { vertex.set_data(ninedges); } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all user nodes if ( vertex.num_outedges() > 0 && (algo == GLOBAL_MEAN || algo == USER_MEAN)){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings if (algo == USER_MEAN){ for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); user.mean_rating += observation; } if (vertex.num_edges() > 0) user.mean_rating /= vertex.num_edges(); } //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { double prediction; float observation = vertex.edge(e)->get_data(); vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; rmse_vec[omp_get_thread_num()] += baseline_predict(user, movie, observation, prediction); } } else if (vertex.num_inedges() > 0 && algo == ITEM_MEAN){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); user.mean_rating += observation; } if (vertex.num_edges() > 0) user.mean_rating /= vertex.num_edges(); for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); double prediction; vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; rmse_vec[omp_get_thread_num()] += baseline_predict(movie, user, observation, prediction); } } }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0){ //Vertexinfo vdata = vertex.get_data(); //id_th = vertex.id(); vid_t row = vertex.id() / nshards; //vid_t new_id = row * nshards + prefix_sum[vertex.id() % nshards]; vid_t new_id = row + prefix_sum[vertex.id() % nshards]; vertex.set_data(new_id); //source vertex in each CC //vdata.level = 0; //vertex.set_data(vdata); //vid_t new_id = getNewId(vdata.ccid, vdata.level); for(int i=0; i<vertex.num_edges(); i++){ bidirectional_label edata = vertex.edge(i)->get_data(); edata.my_label(vertex.id(), vertex.edge(i)->vertex_id()) = new_id; vertex.edge(i)->set_data(edata); } lock.lock(); fprintf(vfout, "%u\t%u\n", new_id, vertex.id()); lock.unlock(); }else{ for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edata = vertex.outedge(i)->get_data(); vid_t my_id = edata.my_label(vertex.id(), vertex.outedge(i)->vertex_id()); vid_t nb_id = edata.neighbor_label(vertex.id(), vertex.outedge(i)->vertex_id()); if(my_id == nb_id){ std::cout<<"my_id="<<vertex.id()<<"\tmy_label="<<my_id <<"\tnb_label="<<nb_id <<"\tnb_vid="<<vertex.outedge(i)->vertex_id()<<std::endl; assert(my_id != nb_id); } if(!flag_weight){ lock.lock(); fprintf(efout, "%u\t%u\n", my_id, nb_id); lock.unlock(); }else{ lock.lock(); fprintf(efout, "%u\t%u\t%.3f\n", my_id, nb_id, edata.weight); lock.unlock(); } } } }
/** * compute validaton RMSE for a single user */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (user_nodes && vertex.id() >= M) return; else if (!user_nodes && vertex.id() < M) return; vertex_data & vdata = latent_factors_inmem[vertex.id()]; for(int e=0; e < vertex.num_outedges(); e++) { double observation = vertex.edge(e)->get_data().weight; uint time = (uint)vertex.edge(e)->get_data().time; vertex_data * time_node = NULL; if (time_nodes){ assert(time >= time_nodes_offset && time < time_nodes_offset+K); time_node = &latent_factors_inmem[time]; } vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double prediction; double rmse = (*pprediction_func)(vdata, nbr_latent, observation, prediction, (void*)time_node); assert(rmse <= pow(maxval - minval, 2)); if (time_weighting) rmse *= vertex.edge(e)->get_data().time; assert(validation_rmse_vec.size() > omp_get_thread_num()); validation_rmse_vec[omp_get_thread_num()] += rmse; } }
/** The actual LambdaRank implementation. */ virtual void compute_gradients( graphchi_vertex<TypeVertex, FeatureEdge> &query, Gradient* umodel) { std::vector<double> lambdas(query.num_outedges()); std::vector<double> s_is(query.num_outedges()); /* First, we compute all the outputs... */ for (int i = 0; i < query.num_outedges(); i++) { s_is[i] = get_score(query.outedge(i)); // std::cout << "s[" << i << "] == " << s_is[i] << std::endl; } /* ...and the retrieval measure scores. */ opt.compute(query); /* Now, we compute the errors (lambdas). */ for (int i = 0; i < query.num_outedges() - 1; i++) { int rel_i = get_relevance(query.outedge(i)); for (int j = i + 1; j < query.num_outedges(); j++) { int rel_j = get_relevance(query.outedge(j)); if (rel_i != rel_j) { double S_ij = rel_i > rel_j ? 1 : -1; double lambda_ij = dC_per_ds_i(S_ij, s_is[i], s_is[j]) * fabs(opt.delta(query, i, j)); /* lambda_ij = -lambda_ji */ lambdas[i] += lambda_ij; lambdas[j] -= lambda_ij; } } } /* Finally, the model update. */ for (int i = 0; i < query.num_outedges(); i++) { // -lambdas[i], as C is a utility function in this case umodel->update(query.outedge(i)->get_vector()->get_data(), s_is[i], lambdas[i]); } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0) { if (is_user(vertex.id()) && vertex.num_outedges() > 0) { vertex_data& user = latent_factors_inmem[vertex.id()]; user.pvec = zeros(D*3); for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); int r = (int)(observation/rbm_scaling); assert(r < rbm_bins); mov.bi[r]++; } } return; } else if (gcontext.iteration == 1) { if (vertex.num_inedges() > 0) { rbm_movie mov = latent_factors_inmem[vertex.id()]; setRand2(mov.w, D*rbm_bins, 0.001); for(int r = 0; r < rbm_bins; ++r) { mov.bi[r] /= (double)vertex.num_inedges(); mov.bi[r] = log(1E-9 + mov.bi[r]); if (mov.bi[r] > 1000) { assert(false); Rcpp::Rcerr<<"Numerical overflow" <<std::endl; } } } return; //done with initialization } //go over all user nodes if (is_user(vertex.id()) && vertex.num_outedges()) { vertex_data & user = latent_factors_inmem[vertex.id()]; user.pvec = zeros(3*D); rbm_user usr(user); vec v1 = zeros(vertex.num_outedges()); //go over all ratings for(int e=0; e < vertex.num_outedges(); e++) { float observation = vertex.edge(e)->get_data(); rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; int r = (int)(observation / rbm_scaling); assert(r < rbm_bins); for(int k=0; k < D; k++) { usr.h[k] += mov.w[D*r + k]; assert(!std::isnan(usr.h[k])); } } for(int k=0; k < D; k++) { usr.h[k] = sigmoid(usr.h[k]); if (drand48() < usr.h[k]) usr.h0[k] = 1; else usr.h0[k] = 0; } int i = 0; double prediction; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); predict1(usr, mov, observation, prediction); int vi = (int)(prediction / rbm_scaling); v1[i] = vi; i++; } i = 0; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; int r = (int)v1[i]; for (int k=0; k< D; k++) { usr.h1[k] += mov.w[r*D+k]; } i++; } for (int k=0; k < D; k++) { usr.h1[k] = sigmoid(usr.h1[k]); if (drand48() < usr.h1[k]) usr.h1[k] = 1; else usr.h1[k] = 0; } i = 0; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); double prediction; rbm_predict(user, mov, observation, prediction, NULL); double pui = prediction / rbm_scaling; double rui = observation / rbm_scaling; rmse_vec[omp_get_thread_num()] += (pui - rui) * (pui - rui); //nn += 1.0; int vi0 = (int)(rui); int vi1 = (int)v1[i]; for (int k = 0; k < D; k++) { mov.w[D*vi0+k] += rbm_alpha * (usr.h0[k] - rbm_beta * mov.w[vi0*D+k]); assert(!std::isnan(mov.w[D*vi0+k])); mov.w[D*vi1+k] -= rbm_alpha * (usr.h1[k] + rbm_beta * mov.w[vi1*D+k]); assert(!std::isnan(mov.w[D*vi1+k])); } i++; } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { double objective = -0.5*sgd_lambda*latent_factors_inmem[vertex.id()].pvec.squaredNorm(); // go over all user nodes if (vertex.num_outedges() > 1) { // can't compute with CLiMF if we have only 1 out edge! vec & U = latent_factors_inmem[vertex.id()].pvec; int Ni = vertex.num_edges(); // precompute f_{ij} = <U_i,V_j> for j = 1..N_i std::vector<double> f(Ni); int num_relevant = 0; for (int j = 0; j < Ni; ++j) { if (is_relevant(vertex.edge(j))) { const vec & Vj = latent_factors_inmem[vertex.edge(j)->vertex_id()].pvec; f[j] = dot(U, Vj); ++num_relevant; } } if (num_relevant < 2) { return; // need at least 2 edges to compute updates with CLiMF! node_without_edges++; } // compute gradients vec dU = -sgd_lambda*U; for (int j = 0; j < Ni; ++j) { if (is_relevant(vertex.edge(j))) { vec & Vj = latent_factors_inmem[vertex.edge(j)->vertex_id()].pvec; vec dVj = g(-f[j])*ones(D) - sgd_lambda*Vj; for (int k = 0; k < Ni; ++k) { if (k != j && is_relevant(vertex.edge(k))) { dVj += dg(f[j]-f[k])*(1.0/(1.0-g(f[k]-f[j]))-1.0/(1.0-g(f[j]-f[k])))*U; } } Vj += sgd_gamma*dVj; // not thread-safe dU += g(-f[j])*Vj; for (int k = 0; k < Ni; ++k) { if (k != j && is_relevant(vertex.edge(k))) { const vec & Vk = latent_factors_inmem[vertex.edge(k)->vertex_id()].pvec; dU += (Vj-Vk)*dg(f[k]-f[j])/(1.0-g(f[k]-f[j])); } } } } U += sgd_gamma*dU; // not thread-safe stat_vec[omp_get_thread_num()] += fabs(sgd_gamma*dU[0]); // compute smoothed MRR for(int j = 0; j < Ni; j++) { if (is_relevant(vertex.edge(j))) { objective += std::log(g(f[j])); for(int k = 0; k < Ni; k++) { if (is_relevant(vertex.edge(k))) { objective += std::log(1.0-g(f[k]-f[j])); } } } } } objective_vec[omp_get_thread_num()] += objective; }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { VertexDataType vertexdata; //= vertex.get_data(); //bool propagate = false; if (gcontext.iteration == 0) { //vertex.set_data(SCCinfo(vertex.id())); vertexdata = vertex.get_data(); /* vertices that is not visited in Fw phase is not in the giant SCC! * minor improve by mzj 2016/3/13 */ if(!vertexdata.confirmed) return; //assert(vertexdata.color == root); if(vertex.id() == root){ //vertexdata.confirmed = true; vertexdata.color = vertex.id(); vertexdata.reconfirmed = true; for(int i=0; i<vertex.num_inedges(); i++){ bidirectional_label edgedata = vertex.inedge(i)->get_data(); edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertex.id(); vertex.inedge(i)->set_data(edgedata); if(scheduler) gcontext.scheduler->add_task(vertex.inedge(i)->vertexid); vertex.inedge(i)->set_data(edgedata); } vertex.set_data(vertexdata); }else{ vertexdata.reconfirmed = false; vertexdata.color = vertex.id(); for(int i=0; i<vertex.num_inedges(); i++){ bidirectional_label edgedata = vertex.inedge(i)->get_data(); edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertex.id(); vertex.inedge(i)->set_data(edgedata); //if(scheduler) gcontext.scheduler->add_task(vertex.outedge(i)->vertexid); } vertex.set_data(vertexdata); } //vertex.set_data(vertexdata); } else { vertexdata = vertex.get_data(); if(!vertexdata.confirmed) return ; vid_t min_color = vertexdata.color; for(int i=0; i<vertex.num_outedges(); i++){ //min_color = std::min(min_color, vertexdata.inedge(i)->get_data().neighbor_label(vertex.id(), vertex.inedge(i)->vertexid)); if(root == (vertex.outedge(i)->get_data()).neighbor_label(vertex.id(), vertex.outedge(i)->vertexid)){ min_color = root; break; } } if(min_color != vertexdata.color){ converged = false; //vertexdata.confirmed = true; vertexdata.reconfirmed = true; vertexdata.color = min_color; for(int i=0; i<vertex.num_inedges(); i++){ bidirectional_label edgedata = vertex.inedge(i)->get_data(); edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = min_color; if(scheduler) gcontext.scheduler->add_task(vertex.inedge(i)->vertexid); vertex.inedge(i)->set_data(edgedata); } vertex.set_data(vertexdata); } } }