/** * Pagerank update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) { float sum=0; if (ginfo.iteration == 0) { /* On first iteration, initialize vertex and out-edges. The initialization is important, because on every run, GraphChi will modify the data in the edges on disk. */ update_edge_data(v, 1.0); v.set_data(RANDOMRESETPROB); } else { /* Compute the sum of neighbors' weighted pageranks by reading from the in-edges. */ for(int i=0; i < v.num_inedges(); i++) { //float val = v.inedge(i)->get_data(); //sum += val; struct weightE eData = v.inedge(i)->get_data(); sum += eData.pagerank; } /* Compute my pagerank */ float pagerank = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum; /* Write my pagerank divided by the number of out-edges to each of my out-edges. */ update_edge_data(v, pagerank); /* Keep track of the progression of the computation. GraphChi engine writes a file filename.deltalog. */ ginfo.log_change(std::abs(pagerank - v.get_data())); /* Set my new pagerank as the vertex value */ v.set_data(pagerank); } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType > &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0) { for(int i=0; i < vertex.num_outedges(); i++) { chivector<vid_t> * evector = vertex.outedge(i)->get_vector(); evector->clear(); assert(evector->size() == 0); evector->add(vertex.id()); assert(evector->size() == 1); assert(evector->get(0) == vertex.id()); } } else { for(int i=0; i < vertex.num_inedges(); i++) { graphchi_edge<EdgeDataType> * edge = vertex.inedge(i); chivector<vid_t> * evector = edge->get_vector(); assert(evector->size() >= gcontext.iteration); for(int j=0; j < evector->size(); j++) { vid_t expected = edge->vertex_id() + j; vid_t has = evector->get(j); if (has != expected) { std::cout << "Mismatch: " << has << " != " << expected << std::endl; } assert(has == expected); } } for(int i=0; i < vertex.num_outedges(); i++) { vertex.outedge(i)->get_vector()->add(vertex.id() + gcontext.iteration); } } vertex.set_data(gcontext.iteration + 1); }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* On subsequent iterations, find the minimum label of my neighbors */ if (!edge_count){ vid_t curmin = vertex_values[vertex.id()]; if (gcontext.iteration == 0 && vertex.num_edges() > 0){ mymutex.lock(); actual_vertices++; mymutex.unlock(); } for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } if (vertex_values[vertex.id()] > curmin) { changes++; set_data(vertex, curmin); } } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); if (vertex.edge(i)->vertex_id() > vertex.id()){ mymutex.lock(); state[curmin]++; mymutex.unlock(); } } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all user nodes if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; //go over all ratings for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; double estScore; rmse_vec[omp_get_thread_num()] += sgd_predict(user, movie, observation, estScore); double err = observation - estScore; if (std::isnan(err) || std::isinf(err)) logstream(LOG_FATAL)<<"SGD got into numerical error. Please tune step size using --sgd_gamma and sgd_lambda" << std::endl; //NOTE: the following code is not thread safe, since potentially several //user nodes may updates this item gradient vector concurrently. However in practice it //did not matter in terms of accuracy on a multicore machine. //if you like to defend the code, you can define a global variable //mutex mymutex; // //and then do: mymutex.lock() movie.pvec += sgd_gamma*(err*user.pvec - sgd_lambda*movie.pvec); //and here add: mymutex.unlock(); user.pvec += sgd_gamma*(err*movie.pvec - sgd_lambda*user.pvec); } } }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if(gcontext.iteration == 0){ if(vertex.num_edges() == 0) return; VertexDataType vertexdata = vertex.get_data(); if(!vertexdata.confirmed || !vertexdata.reconfirmed) return ; //assert(vertex.num_inedges() * vertex.num_outedges() <= product); int ct = 0; for(int i=0; i<vertex.num_edges(); i++){ graphchi_edge<EdgeDataType>* edge = vertex.edge(i); bidirectional_label edgedata = edge->get_data(); if(edgedata.is_equal()){ /* if(edgedata.smaller_one != 0) std::cout<<edgedata.smaller_one<<" \t"<<edgedata.larger_one<<"\t root="<<root<<std::endl; */ if(root == edgedata.my_label(vertex.id(), edge->vertexid)){ ct++; } } /* lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); */ } assert(ct > 1); } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * edge.weight * edge.time; XtX.triangularView<Eigen::Upper>() += X * X.transpose() * edge.time; if (compute_rmse) { double prediction; vdata.rmse += wals_predict(vdata, nbr_latent, edge.weight, prediction) * edge.time; } } // Diagonal for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); // Solve the least squares problem with eigen using Cholesky decomposition Map<vec> vdata_vec(vdata.pvec, NLATENT); vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all samples (rows) if ( vertex.num_outedges() > 0){ assert(vertex.id() < M); vertex_data & row = latent_factors_inmem[vertex.id()]; assert(row.y == -1 || row.y == 1); if (debug) std::cout<<"Entered item " << vertex.id() << " y: " << row.y << std::endl; row.sigma = beta*beta; row.xT_mu = 0; //go over all features for(int e=0; e < vertex.num_outedges(); e++) { uint feature_id = vertex.edge(e)->vertex_id(); edge_data edge = vertex.edge(e)->get_data(); assert(sigma_ij[feature_id] > 0); assert(edge.x_ij == 1); /* compute equation (6) */ row.sigma += edge.x_ij * sigma_ij[feature_id]; /* compute the sum xT*w as needed in equations (7) and (8) */ row.xT_mu += edge.x_ij * mu_ij[feature_id]; } double prediction; double ret = ctr_predict(row, row, row.y, prediction); double predicted_target = prediction < 0 ? -1: 1; if ((predicted_target == -1 && row.y == 1) || (predicted_target == 1 && row.y == -1)) err_vec[omp_get_thread_num()] += 1.0; if (debug) std::cout<<"Prediction was: " << prediction << " real value: " << row.y << std::endl; liklihood_vec[omp_get_thread_num()] += ret; assert(row.sigma > 0); //go over all features for(int e=0; e < vertex.num_outedges(); e++) { edge_data edge = vertex.edge(e)->get_data(); uint feature_id = vertex.edge(e)->vertex_id(); assert(row.sigma > 0); double product = row.y * row.xT_mu / sqrt(row.sigma); mu_ij[feature_id] += (row.y * edge.x_ij * sigma_ij[feature_id] / sqrt(row.sigma)) * v(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " v(product): " << v(product) << " value: " <<(row.y * edge.x_ij * edge.sigma_ij * edge.sigma_ij / sqrt(row.sigma)) * v(product) << std::endl; double factor = 1.0 - (edge.x_ij * sigma_ij[feature_id] / row.sigma)*w(product); //if (debug) // std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " w(product): " << w(product) << " factor: " << (1.0 - (edge.x_ij * edge.sigma_ij / row.sigma)*w(product)) << " sigma_ij " << edge.sigma_ij << " product: " << edge.sigma_ij * factor << std::endl; assert(factor > 0); sigma_ij[feature_id] *= factor; assert(sigma_ij[feature_id] > 0); } } }
// Helper virtual void set_latent_factor(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, latentvec_t &fact) { vertex.set_data(fact); for(int i=0; i < vertex.num_edges(); i++) { als_factor_and_weight factwght = vertex.edge(i)->get_data(); factwght.factor = fact; vertex.edge(i)->set_data(factwght); // Note that neighbors override the values they have written to edges. // This is ok, because vertices are always executed in same order. } }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { // Loop over only in-edges and output them. This way deleted edges won't be included. for(int i=0; i < vertex.num_inedges(); i++) { graphchi_edge<EdgeDataType> * e = vertex.inedge(i); ((sharded_graph_output<VertexDataType, EdgeDataType> *)gengine->output(CONTRACTED_GRAPH_OUTPUT))->output_edgeval(e->vertex_id(), vertex.id(), e->get_data()); } }
/** * Compute size of the relevant intersection of v and a pivot */ int intersection_size(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int start_i) { assert(is_pivot(pivot)); int count = 0; if (pivot > v.id()) { dense_adj &dadj = adjs[pivot - pivot_st]; int vc = v.num_edges(); /** * If the adjacency list sizes are not too different, use * 'merge'-type of operation to compute size intersection. */ if (dadj.count < 32 * (vc - start_i)) { // TODO: do real profiling to find best cutoff value // Do merge-style of check assert(v.edge(start_i)->vertex_id() == pivot); int i1 = 0; int i2 = start_i+1; int nedges = v.num_edges(); while (i1 < dadj.count && i2 < nedges) { vid_t dst = v.edge(i2)->vertexid; vid_t a = dadj.adjlist[i1]; if (a == dst) { /* Add one to edge between v and the match */ v.edge(i2)->set_data(v.edge(i2)->get_data() + 1); count++; i1++; i2++; } else { i1 += a < dst; i2 += a > dst; } } } else { /** * Otherwise, use linear/binary search. */ vid_t lastvid = 0; for(int i=start_i+1; i < vc; i++) { vid_t nb = v.edge(i)->vertexid; if (nb > pivot && nb != lastvid) { int match = findadj(dadj.adjlist, dadj.count, nb); count += match; if (match > 0) { /* Add one to edge between v and the match */ v.edge(i)->set_data(v.edge(i)->get_data() + 1); } } lastvid = nb; } } } return count; }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if ( vertex.num_outedges() > 0){ vertex_data & user = latent_factors_inmem[vertex.id()]; memset(&user.weight[0], 0, sizeof(double)*D); for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; user.weight += movie.weight; } // sqrt(|N(u)|) float usrNorm = double(1.0/sqrt(vertex.num_outedges())); //sqrt(|N(u)| * sum_j y_j user.weight *= usrNorm; vec step = zeros(D); // main algorithm, see Koren's paper, just below below equation (16) for(int e=0; e < vertex.num_outedges(); e++) { vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); double estScore; rmse_vec[omp_get_thread_num()] += svdpp_predict(user, movie,observation, estScore); // e_ui = r_ui - \hat{r_ui} float err = observation - estScore; assert(!std::isnan(rmse_vec[omp_get_thread_num()])); vec itmFctr = movie.pvec; vec usrFctr = user.pvec; //q_i = q_i + gamma2 *(e_ui*(p_u + sqrt(N(U))\sum_j y_j) - gamma7 *q_i) for (int j=0; j< D; j++) movie.pvec[j] += svdpp.itmFctrStep*(err*(usrFctr[j] + user.weight[j]) - svdpp.itmFctrReg*itmFctr[j]); //p_u = p_u + gamma2 *(e_ui*q_i -gamma7 *p_u) for (int j=0; j< D; j++) user.pvec[j] += svdpp.usrFctrStep*(err *itmFctr[j] - svdpp.usrFctrReg*usrFctr[j]); step += err*itmFctr; //b_i = b_i + gamma1*(e_ui - gmma6 * b_i) movie.bias += svdpp.itmBiasStep*(err-svdpp.itmBiasReg* movie.bias); //b_u = b_u + gamma1*(e_ui - gamma6 * b_u) user.bias += svdpp.usrBiasStep*(err-svdpp.usrBiasReg* user.bias); } step *= float(svdpp.itmFctr2Step*usrNorm); //gamma7 double mult = svdpp.itmFctr2Step*svdpp.itmFctr2Reg; for(int e=0; e < vertex.num_edges(); e++) { vertex_data& movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; //y_j = y_j + gamma2*sqrt|N(u)| * q_i - gamma7 * y_j movie.weight += step - mult * movie.weight; } } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; bool isuser = vertex.id() < M; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); float observation = edge.weight; vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX.triangularView<Eigen::Upper>() += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += pmf_predict(vdata, nbr_latent, observation, prediction, (void*)&edge.avgprd); vertex.edge(e)->set_data(edge); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; // Solve the least squares problem with eigen using Cholesky decomposition mat iAi_; bool ret =inv((isuser? A_U : A_V) + alpha * XtX, iAi_); assert(ret); vec mui_ = iAi_*((isuser? (A_U*mu_U) : (A_V*mu_V)) + alpha * Xty); vdata.pvec = mvnrndex(mui_, iAi_, D, 0); assert(vdata.pvec.size() == D); }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = is_user(vertex.id()); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data().weight; uint time = vertex.edge(e)->get_data().time; vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; vertex_data & time_node = latent_factors_inmem[time]; assert(time != vertex.id() && time != vertex.edge(e)->vertex_id()); Map<vec> X(nbr_latent.pvec, NLATENT); Map<vec> Y(time_node.pvec, NLATENT); vec XY = X.cwiseProduct(Y); Xty += XY * observation; XtX.triangularView<Eigen::Upper>() += XY * XY.transpose(); if (compute_rmse) { double prediction; vdata.rmse += als_tensor_predict(vdata, nbr_latent, time_node, observation, prediction); } } for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); // Solve the least squares problem with eigen using Cholesky decomposition Map<vec> vdata_vec(vdata.pvec, NLATENT); vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** Scores all documents for the query. The first step in update(). */ void score_documents(graphchi_vertex<TypeVertex, FeatureEdge> &query, graphchi_context &ginfo) { // XXX // std::map<double, FeatureEdge> scores; for (int doc = 0; doc < query.num_outedges(); doc++) { FeatureEdge* fe = query.outedge(doc)->get_vector(); fe->header().score = model->score(fe->get_data()); // query.outedge(doc)->set_vector(fe); // scores[fe.score] = fe; } // for (auto rit = scores.crbegin(); rit != scores.crend(); ++rit) { // std::cout << "Score " << query.id() // << ": " << rit->second.str() << std::endl; // } }
/** * calc distance between two items. * Let a be all the users rated item 1 * Let b be all the users rated item 2 * * 3) Using Pearson correlation * Dist_ab = (a - mean)*(b- mean)' / (std(a)*std(b)) * * 4) Using cosine similarity: * Dist_ab = (a*b) / sqrt(sum_sqr(a)) * sqrt(sum_sqr(b))) * * 5) Using chebychev: * Dist_ab = max(abs(a-b)) * * 6) Using manhatten distance: * Dist_ab = sum(abs(a-b)) * * 7) Using tanimoto: * Dist_ab = 1.0 - [(a*b) / (sum_sqr(a) + sum_sqr(b) - a*b)] * * 8) Using log likelihood similarity * Dist_ab = 1.0 - 1.0/(1.0 + loglikelihood) * * 9) Using Jaccard: * Dist_ab = intersect(a,b) / (size(a) + size(b) - intersect(a,b)) */ double calc_distance(graphchi_vertex<VertexDataType, EdgeDataType> &v, vid_t pivot, int distance_metric) { //assert(is_pivot(pivot)); //assert(is_item(pivot) && is_item(v.id())); dense_adj &pivot_edges = adjs[pivot - pivot_st]; int num_edges = v.num_edges(); dense_adj item_edges; for(int i=0; i < num_edges; i++){ set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data()); } if (distance_metric == JACCARD_WEIGHT){ return calc_jaccard_weight_distance(pivot_edges.edges, item_edges.edges, get_val( pivot_edges.edges, v.id()), 0); } return NAN; }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; if (vertex.num_edges() == 0 || vdata.seed) //no edges, nothing to do here return; vec ret = zeros(D); double normalization = 0; for(int e=0; e < vertex.num_edges(); e++) { edge_data edge = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; ret += edge.cooccurence_count * nbr_latent.pvec; normalization += edge.cooccurence_count; } ret /= normalization; vdata.pvec = alpha * vdata.pvec + (1-alpha)*ret; }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; mat XtX = mat::Zero(D, D); vec Xty = vec::Zero(D); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Xty += nbr_latent.pvec * observation; XtX += nbr_latent.pvec * nbr_latent.pvec.transpose(); if (compute_rmse) { double prediction; rmse_vec[omp_get_thread_num()] += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } double regularization = lambda; if (regnormal) lambda *= vertex.num_edges(); for(int i=0; i < D; i++) XtX(i,i) += regularization; bool isuser = vertex.id() < (uint)M; if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata.pvec = CoSaMP(XtX, Xty, (int)ceil(sparsity_level*(double)D), 10, 1e-4, D); } else vdata.pvec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { assert(vertex.num_inedges() * vertex.num_outedges() <= product); for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edgedata = vertex.outedge(i)->get_data(); if(edgedata.is_equal()){ if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){ lock.lock(); fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); continue; } } lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); } }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { // assert(vertex.num_inedges() * vertex.num_outedges() <= product); if(vertex.num_edges() == 0) return; if(gcontext.iteration == 0){ VertexDataType vertexdata = vertex.get_data(); if(!vertexdata.confirmed){ lock.lock(); left++; lock.unlock(); return; } if(vertexdata.confirmed && vertexdata.reconfirmed){ lock.lock(); middle++; lock.unlock(); }else{ lock.lock(); right++; lock.unlock(); } } /* for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edgedata = vertex.outedge(i)->get_data(); if(edgedata.is_equal()){ if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){ lock.lock(); fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); continue; } } lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); } */ }
/** * Pagerank update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) { float sum=0; float prv = 0.0; float pagerankcont = 0.0; if (ginfo.iteration == 0) { /* On first iteration, initialize vertex and out-edges. The initialization is important, because on every run, GraphChi will modify the data in the edges on disk. */ /* For the weighted version */ update_edge_data(v, 1.0, true); v.set_data(RANDOMRESETPROB); //v.set_data(1.0); } else { /* We need to come up with the weighted version */ for(int i=0; i < v.num_inedges(); i++) { chivector<float> * evector = v.inedge(i)->get_vector(); assert(evector->size() >= 2); sum += evector->get(1); //std::cout << v.id() << " with data: " << evector->get(1) << " with weight " << evector->get(0) << std::endl; //std::cout << v.id() << " edge endpoint: " << v.inedge(i)->vertex_id() << std::endl; //evector->clear(); } /* Compute my pagerank */ prv = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum; //std::cout << "sum" << sum << "pagerank: " << prv << std::endl; update_edge_data(v, prv, false); /* Keep track of the progression of the computation. GraphChi engine writes a file filename.deltalog. */ double delta = std::abs(prv - v.get_data()); //std::cout << "pagerank: " << prv << "v.data" << v.get_data() << "delta: " << delta << std::endl; ginfo.log_change(delta); /* Set my new pagerank as the vertex value */ v.set_data(prv); } }
/** * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { vertex_data & vdata = latent_factors_inmem[vertex.id()]; vdata.rmse = 0; mat XtX = mat::Zero(NLATENT, NLATENT); vec Xty = vec::Zero(NLATENT); bool compute_rmse = (vertex.num_outedges() > 0); // Compute XtX and Xty (NOTE: unweighted) for(int e=0; e < vertex.num_edges(); e++) { float observation = vertex.edge(e)->get_data(); vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()]; Map<vec> X(nbr_latent.pvec, NLATENT); Xty += X * observation; XtX += X * X.transpose(); if (compute_rmse) { double prediction; vdata.rmse += sparse_als_predict(vdata, nbr_latent, observation, prediction); } } for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges(); bool isuser = vertex.id() < (uint)M; Map<vec> vdata_vec(vdata.pvec, NLATENT); if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || (algorithm == SPARSE_ITM_FACTOR && !isuser)){ double sparsity_level = 1.0; if (isuser) sparsity_level -= user_sparsity; else sparsity_level -= movie_sparsity; vdata_vec = CoSaMP(XtX, Xty, ceil(sparsity_level*(double)NLATENT), 10, 1e-4, NLATENT); } else vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty); }
/** * Vertex update function. * On first iteration ,each vertex chooses a label = the vertex id. * On subsequent iterations, each vertex chooses the minimum of the neighbor's * label (and itself). */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { /* This program requires selective scheduling. */ assert(gcontext.scheduler != NULL); if(gcontext.iteration == 0) { set_data(vertex, vertex.id()); /* Schedule neighbor for update */ gcontext.scheduler->add_task(vertex.id()); return; } else { vid_t curmin = vertex_values[vertex.id()]; for(int i=0; i < vertex.num_edges(); i++) { vid_t nblabel = neighbor_value(vertex.edge(i)); curmin = std::min(nblabel, curmin); } if ( curmin < vertex.get_data() ) { for(int i=0; i < vertex.num_edges(); i++) { if (curmin < neighbor_value(vertex.edge(i))) { /* Schedule neighbor for update */ gcontext.scheduler->add_task(vertex.edge(i)->vertex_id()); } } set_data(vertex, curmin); } } /* On subsequent iterations, find the minimum label of my neighbors */ /* If my label changes, schedule neighbors */ }
/** * Grab pivot's adjacency list into memory. */ int load_edges_into_memory(graphchi_vertex<uint32_t, edge_data> &v) { assert(is_pivot(v.id())); assert(is_user(v.id())); int num_edges = v.num_edges(); dense_adj dadj; for(int i=0; i<num_edges; i++) set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight); //dadj.ratings = zeros(N); dadj.vid = v.id(); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/); return num_edges; }
/** * Grab pivot's adjacency list into memory. */ int load_edges_into_memory(graphchi_vertex<VertexDataType, EdgeDataType> &v) { //assert(is_pivot(v.id())); //assert(is_item(v.id())); int num_edges = v.num_edges(); //not enough user rated this item, we don't need to compare to it if (num_edges < min_allowed_intersection){ if (debug) logstream(LOG_DEBUG)<<"Skipping since num edges: " << num_edges << std::endl; return 0; } // Count how many neighbors have larger id than v dense_adj dadj; for(int i=0; i<num_edges; i++) set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data()); //std::sort(&dadj.adjlist[0], &dadj.adjlist[0] + num_edges); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/); return num_edges; }
/** The actual LambdaRank implementation. */ virtual void compute_gradients( graphchi_vertex<TypeVertex, FeatureEdge> &query, Gradient* umodel) { std::vector<double> lambdas(query.num_outedges()); std::vector<double> s_is(query.num_outedges()); /* First, we compute all the outputs... */ for (int i = 0; i < query.num_outedges(); i++) { s_is[i] = get_score(query.outedge(i)); // std::cout << "s[" << i << "] == " << s_is[i] << std::endl; } /* ...and the retrieval measure scores. */ opt.compute(query); /* Now, we compute the errors (lambdas). */ for (int i = 0; i < query.num_outedges() - 1; i++) { int rel_i = get_relevance(query.outedge(i)); for (int j = i + 1; j < query.num_outedges(); j++) { int rel_j = get_relevance(query.outedge(j)); if (rel_i != rel_j) { double S_ij = rel_i > rel_j ? 1 : -1; double lambda_ij = dC_per_ds_i(S_ij, s_is[i], s_is[j]) * fabs(opt.delta(query, i, j)); /* lambda_ij = -lambda_ji */ lambdas[i] += lambda_ij; lambdas[j] -= lambda_ij; } } } /* Finally, the model update. */ for (int i = 0; i < query.num_outedges(); i++) { // -lambdas[i], as C is a utility function in this case umodel->update(query.outedge(i)->get_vector()->get_data(), s_is[i], lambdas[i]); } }
void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if(vertex.num_edges() == 0) return ; VertexDataType vertexdata = vertex.get_data(); if(vertexdata.confirmed && vertexdata.reconfirmed) return ; //assert(vertex.num_inedges() * vertex.num_outedges() <= product); if (gcontext.iteration == 0){ if(vertexdata.confirmed){ vertexdata.color = getNewIdRight(); }else{ vertexdata.color = getNewIdLeft(); } vertex.set_data(vertexdata); }else{ /* for(int i=0; i<vertex.num_outedges(); i++){ bidirectional_label edgedata = vertex.outedge(i)->get_data(); if(edgedata.is_equal()){ if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){ lock.lock(); fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); continue; } } lock.lock(); fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid); lock.unlock(); } */ lock.lock(); fprintf(vmap, "%u\t%u\n", vertex.id(), vertexdata.color); lock.unlock(); } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, edge_data> &v, graphchi_context &gcontext) { if (debug) printf("Entered iteration %d with %d\n", gcontext.iteration, is_item(v.id()) ? (v.id() - M + 1): v.id()); /* Even iteration numbers: * 1) load a subset of users into memory (pivots) * 2) Find which subset of items is connected to the users */ if (gcontext.iteration % 2 == 0) { if (adjcontainer->is_pivot(v.id()) && is_user(v.id())) { adjcontainer->load_edges_into_memory(v); if (debug) printf("Loading pivot %d intro memory\n", v.id()); } } /* odd iteration number: * 1) For any item connected to a pivot item * compute itersection */ else { assert(is_item(v.id())); for (int i=0; i< v.num_edges(); i++) { if (!adjcontainer->is_pivot(v.edge(i)->vertex_id())) continue; if (debug) printf("comparing user pivot %d to item %d\n", v.edge(i)->vertex_id()+1 , v.id() - M + 1); adjcontainer->compute_ratings(v, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight); item_pairs_compared++; if (item_pairs_compared % 1000000 == 0) Rcpp::Rcout<< std::setw(10) << mytimer.current_time() << ") " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl; } }//end of iteration % 2 == 1 }//end of update function
/** * Update the weigthed edge chivector * We first obtain the edge weight from the first element, sum them, then update the * second item by eacg edge's weight */ void update_edge_data(graphchi_vertex<VertexDataType, EdgeDataType> &v, float quota, bool first){ float sum = 0.0; //if(first) for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<EdgeDataType> * edge = v.outedge(i); if (edge != NULL) { chivector<float> * evector = edge->get_vector(); //std::cout << evector->size() << std::endl; /*if (first) assert(evector->size() == 1); else assert(evector->size() == 2); assert(evector->size() == 2);*/ std::cout << v.id() << " with data: " << evector->get(0) << std::endl; sum += evector->get(0); /*if (first){ evector->add(sum); assert(evector->size() == 2); }*/ } } for(int i=0; i < v.num_outedges(); i++) { graphchi_edge<EdgeDataType> * edge = v.outedge(i); if (edge != NULL) { chivector<float> * evector = edge->get_vector(); // assert(evector->size() == 2); float val = quota * evector->get(0) / sum; //evector->set(1, val); if(first && (evector->size() == 1)) evector->add(val); evector->set(1, val); //std::cout << v.id() << " with data: " << evector->get(0) << std::endl; } } }
/** * This method runs only for the query nodes. Its actual function is divided * into several methods, as not all is needed in each phase. */ void update(graphchi_vertex<TypeVertex, FeatureEdge> &v, graphchi_context &ginfo) { // TODO Use a scheduler instead of this? if (v.get_data().type == QUERY) { // Only queries have outedges (TODO: ???) /* We count the number of queries. */ if (ginfo.iteration == 0) { num_queries++; } score_documents(v, ginfo); if (phase == TRAINING) { compute_gradients(v, parallel_models[omp_get_thread_num()]); } if (phase == TRAINING || phase == VALIDATION || phase == TESTING) { evaluate_model(v, ginfo); } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.id() < (uint)mi.start || vertex.id() >= (uint)mi.end) return; vertex_data& user = latent_factors_inmem[vertex.id()]; bool rows = vertex.id() < (uint)info.get_start_node(false); if (info.is_square()) rows = mi.A_transpose; (void) rows; // unused assert(mi.r_offset >=0); //store previous value for convergence detection if (mi.prev_offset >= 0) user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset]; double val = 0; assert(mi.x_offset >=0 || mi.y_offset>=0); /*** COMPUTE r = c*A*x ********/ if (mi.A_offset && mi.x_offset >= 0){ for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); const vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; val += (edge.weight * movie.pvec[mi.x_offset]); } if (info.is_square() && mi.use_diag)// add the diagonal term val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]); val *= mi.c; } /***** COMPUTE r = c*I*x *****/ else if (!mi.A_offset && mi.x_offset >= 0){ val = mi.c*user.pvec[mi.x_offset]; } /**** COMPUTE r+= d*y (optional) ***/ if (mi.y_offset>= 0){ val += mi.d*user.pvec[mi.y_offset]; } /***** compute r = (... ) / div */ if (mi.div_offset >= 0){ val /= user.pvec[mi.div_offset]; } assert(mi.r_offset>=0 && mi.r_offset < user.pvec.size()); user.pvec[mi.r_offset] = val; } //end update