Example #1
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    mat XtX = mat::Zero(D, D); 
    vec Xty = vec::Zero(D);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      float observation = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Xty += nbr_latent.pvec * observation;
      XtX += nbr_latent.pvec * nbr_latent.pvec.transpose();
      if (compute_rmse) {
        double prediction;
        rmse_vec[omp_get_thread_num()] += sparse_als_predict(vdata, nbr_latent, observation, prediction);
      }
    }

    double regularization = lambda;
    if (regnormal)
      lambda *= vertex.num_edges();
    for(int i=0; i < D; i++) XtX(i,i) += regularization;


    bool isuser = vertex.id() < (uint)M;
    if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || 
        (algorithm == SPARSE_ITM_FACTOR && !isuser)){ 
      double sparsity_level = 1.0;
      if (isuser)
        sparsity_level -= user_sparsity;
      else sparsity_level -= movie_sparsity;
      vdata.pvec = CoSaMP(XtX, Xty, (int)ceil(sparsity_level*(double)D), 10, 1e-4, D); 
    }
    else vdata.pvec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
 /**
   * Grab pivot's adjacency list into memory.
   */
 int grab_adj(graphchi_vertex<uint32_t, uint32_t> &v) {
     if(is_pivot(v.id())) {            
         int ncount = v.num_edges();
         // Count how many neighbors have larger id than v
         v.sort_edges_indirect();
  
         
         int actcount = 0;
         vid_t lastvid = 0;
         for(int i=0; i<ncount; i++) {
             if (v.edge(i)->vertexid > v.id() && v.edge(i)->vertexid != lastvid)  
                 actcount++;  // Need to store only ids larger than me
             lastvid = v.edge(i)->vertex_id();
         }
         
         // Allocate the in-memory adjacency list, using the
         // knowledge of the number of edges.
         dense_adj dadj = dense_adj(actcount, (vid_t*) calloc(sizeof(vid_t), actcount));
         actcount = 0;
         lastvid = 0;
         for(int i=0; i<ncount; i++) {
             if (v.edge(i)->vertexid > v.id() && v.edge(i)->vertexid != lastvid) {  // Need to store only ids larger than me
                 dadj.adjlist[actcount++] = v.edge(i)->vertex_id();
             }
             lastvid = v.edge(i)->vertex_id();
         }
         assert(dadj.count == actcount);
         adjs[v.id() - pivot_st] = dadj;
         assert(v.id() - pivot_st < adjs.size());
         __sync_add_and_fetch(&grabbed_edges, actcount);
         return actcount;
     }
     return 0;
 }
Example #3
0
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		
		if(gcontext.iteration == 0){	

			VertexDataType vertexdata = vertex.get_data();
			if(!vertexdata.confirmed || !vertexdata.reconfirmed)
				return ;	
			assert(vertex.num_inedges() * vertex.num_outedges() <= product);
			for(int i=0; i<vertex.num_outedges(); i++){
				bidirectional_label edgedata = vertex.outedge(i)->get_data();
				if(edgedata.is_equal()){		
					/*
					   if(edgedata.smaller_one != 0)
					   std::cout<<edgedata.smaller_one<<" \t"<<edgedata.larger_one<<"\t root="<<root<<std::endl;
					   */
					if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){
						lock.lock();
						fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
						lock.unlock();
						continue;
					}
				}
				/*
				   lock.lock();
				   fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
				   lock.unlock();
				   */
			}
		}
	}
Example #4
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    vdata.rmse = 0;
    mat XtX = mat::Zero(NLATENT, NLATENT); 
    vec Xty = vec::Zero(NLATENT);

    bool compute_rmse = is_user(vertex.id()); 
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      float observation = vertex.edge(e)->get_data().weight;                
      uint time = vertex.edge(e)->get_data().time;
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      vertex_data & time_node = latent_factors_inmem[time];
      assert(time != vertex.id() && time != vertex.edge(e)->vertex_id());
      Map<vec> X(nbr_latent.pvec, NLATENT);
      Map<vec> Y(time_node.pvec, NLATENT);
      vec XY = X.cwiseProduct(Y);
      Xty += XY * observation;
      XtX.triangularView<Eigen::Upper>() += XY * XY.transpose();
      if (compute_rmse) {
        double prediction;
        vdata.rmse += als_tensor_predict(vdata, nbr_latent, time_node, observation, prediction);
      }
    }

    for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges();

    // Solve the least squares problem with eigen using Cholesky decomposition
    Map<vec> vdata_vec(vdata.pvec, NLATENT);
    vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
Example #5
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    bool isuser = vertex.id() < M;
    mat XtX = mat::Zero(D, D); 
    vec Xty = vec::Zero(D);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      const edge_data & edge = vertex.edge(e)->get_data();
      float observation = edge.weight;                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Xty += nbr_latent.pvec * observation;
      XtX.triangularView<Eigen::Upper>() += nbr_latent.pvec * nbr_latent.pvec.transpose();
      if (compute_rmse) {
        double prediction;
        rmse_vec[omp_get_thread_num()] += pmf_predict(vdata, nbr_latent, observation, prediction, (void*)&edge.avgprd);
        vertex.edge(e)->set_data(edge);
      }
    }

    double regularization = lambda;
    if (regnormal)
      lambda *= vertex.num_edges();
    for(int i=0; i < D; i++) XtX(i,i) += regularization;

    // Solve the least squares problem with eigen using Cholesky decomposition
    mat iAi_;
    bool ret =inv((isuser? A_U : A_V) + alpha *  XtX, iAi_);
    assert(ret);
    vec mui_ =  iAi_*((isuser? (A_U*mu_U) : (A_V*mu_V)) + alpha * Xty); 
    vdata.pvec = mvnrndex(mui_, iAi_, D, 0); 
    assert(vdata.pvec.size() == D);
 }
 /**
  *  Vertex update function.
  */
 void update(graphchi_vertex<VertexDataType, EdgeDataType > &vertex, graphchi_context &gcontext) {
     if (gcontext.iteration == 0) {
         for(int i=0; i < vertex.num_outedges(); i++) {
             chivector<vid_t> * evector = vertex.outedge(i)->get_vector();
             evector->clear();
             assert(evector->size() == 0);
             
             evector->add(vertex.id());
             assert(evector->size() == 1);
             assert(evector->get(0) == vertex.id());
         }
         
     } else {
         for(int i=0; i < vertex.num_inedges(); i++) {
             graphchi_edge<EdgeDataType> * edge = vertex.inedge(i);
             chivector<vid_t> * evector = edge->get_vector();
             assert(evector->size() >= gcontext.iteration);
             for(int j=0; j < evector->size(); j++) {
                 vid_t expected = edge->vertex_id() + j;
                 vid_t has = evector->get(j);
                 if (has != expected) {
                     std::cout << "Mismatch: " << has << " != " << expected << std::endl;
                 }
                 assert(has == expected);
             }
         }
         for(int i=0; i < vertex.num_outedges(); i++) {
             vertex.outedge(i)->get_vector()->add(vertex.id() + gcontext.iteration);
         }
     }
     vertex.set_data(gcontext.iteration + 1);
 }
Example #7
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    vdata.rmse = 0;
    mat XtX = mat::Zero(NLATENT, NLATENT); 
    vec Xty = vec::Zero(NLATENT);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      float observation = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Map<vec> X(nbr_latent.pvec, NLATENT);
      Xty += X * observation;
      XtX += X * X.transpose();
      if (compute_rmse) {
        double prediction;
        vdata.rmse += sparse_als_predict(vdata, nbr_latent, observation, prediction);
      }
    }

    for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges();

    bool isuser = vertex.id() < (uint)M;
    Map<vec> vdata_vec(vdata.pvec, NLATENT);
    if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || 
        (algorithm == SPARSE_ITM_FACTOR && !isuser)){ 
      double sparsity_level = 1.0;
      if (isuser)
        sparsity_level -= user_sparsity;
      else sparsity_level -= movie_sparsity;
      vdata_vec = CoSaMP(XtX, Xty, ceil(sparsity_level*(double)NLATENT), 10, 1e-4, NLATENT); 
    }
    else vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
Example #8
0
  /**
   *  Vertex update function.
   *  On first iteration ,each vertex chooses a label = the vertex id.
   *  On subsequent iterations, each vertex chooses the minimum of the neighbor's
   *  label (and itself).
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    /* On subsequent iterations, find the minimum label of my neighbors */
    if (!edge_count){
      vid_t curmin = vertex_values[vertex.id()];
      if (gcontext.iteration == 0 && vertex.num_edges() > 0){
        mymutex.lock(); actual_vertices++; mymutex.unlock();
      }
      for(int i=0; i < vertex.num_edges(); i++) {
        vid_t nblabel = neighbor_value(vertex.edge(i));
        curmin = std::min(nblabel, curmin);
      }

      if (vertex_values[vertex.id()] > curmin) {
        changes++;
        set_data(vertex, curmin);
      }
    }
    else {
      vid_t curmin = vertex_values[vertex.id()];
      for(int i=0; i < vertex.num_edges(); i++) {
        vid_t nblabel = neighbor_value(vertex.edge(i));
        curmin = std::min(nblabel, curmin);
        if (vertex.edge(i)->vertex_id() > vertex.id()){
        mymutex.lock();
        state[curmin]++;
        mymutex.unlock();
        }
      }
    }
  }
Example #9
0
	/**
	 *  Vertex update function.
	 */
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		//go over all samples (rows)
		if ( vertex.num_outedges() > 0){

			assert(vertex.id() < M);
			vertex_data & row = latent_factors_inmem[vertex.id()]; 
                        assert(row.y == -1 || row.y == 1);

			if (debug)
				std::cout<<"Entered item " << vertex.id() << " y: " << row.y << std::endl;
			row.sigma = beta*beta;
			row.xT_mu = 0;

			//go over all features
			for(int e=0; e < vertex.num_outedges(); e++) {
                                uint feature_id = vertex.edge(e)->vertex_id();
				edge_data edge = vertex.edge(e)->get_data();                

				assert(sigma_ij[feature_id] > 0);
                                assert(edge.x_ij  == 1);

                                /* compute equation (6) */
				row.sigma += edge.x_ij * sigma_ij[feature_id];
                                /* compute the sum xT*w as needed in equations (7) and (8) */
				row.xT_mu += edge.x_ij * mu_ij[feature_id];
                                
			}
			double prediction;
			double ret = ctr_predict(row, row, row.y, prediction);
                        double predicted_target = prediction < 0 ? -1: 1;
			if ((predicted_target == -1  && row.y == 1) || (predicted_target == 1 && row.y == -1))
				err_vec[omp_get_thread_num()] += 1.0;  
                        if (debug)
                                std::cout<<"Prediction was: " << prediction << " real value: " << row.y << std::endl;
			liklihood_vec[omp_get_thread_num()] += ret;

			assert(row.sigma > 0);

			//go over all features
			for(int e=0; e < vertex.num_outedges(); e++) {
				edge_data edge = vertex.edge(e)->get_data();                
                                uint feature_id = vertex.edge(e)->vertex_id();
				assert(row.sigma > 0);
				double product = row.y * row.xT_mu / sqrt(row.sigma);
				mu_ij[feature_id] +=  (row.y * edge.x_ij *  sigma_ij[feature_id]  / sqrt(row.sigma)) * v(product);
				//if (debug)
				//    std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " v(product): " << v(product) << " value: " <<(row.y * edge.x_ij *  edge.sigma_ij * edge.sigma_ij / sqrt(row.sigma)) * v(product) << std::endl; 
				double factor = 1.0 - (edge.x_ij * sigma_ij[feature_id] / row.sigma)*w(product); 
				//if (debug)
				//    std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " w(product): " << w(product) << " factor: " << (1.0 - (edge.x_ij * edge.sigma_ij / row.sigma)*w(product)) << " sigma_ij " << edge.sigma_ij << "  product: " << edge.sigma_ij * factor << std::endl; 

				assert(factor > 0);
				sigma_ij[feature_id] *= factor;
                                assert(sigma_ij[feature_id] > 0);
			}

		}
	}
Example #10
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

  if (vertex.id() >= M)
    return;

  vertex_data & vdata = latent_factors_inmem[vertex.id()];
  int howmany = N*knn_sample_percent;
  assert(howmany > 0 );
  vec distances = vec::Zero(howmany);
  ivec indices = ivec(howmany);
  for (int i=0; i< howmany; i++){
    indices[i]= -2;
  }
  std::vector<bool> curratings;
  curratings.resize(N);
  for(int e=0; e < vertex.num_edges(); e++) {
  //no need to calculate this rating since it is given in the training data reference
    curratings[vertex.edge(e)->vertex_id() - M] = true;
  }
   if (knn_sample_percent == 1.0){
     for (uint i=M; i< M+N; i++){
        if (curratings[i-M])
          continue;
        vertex_data & other = latent_factors_inmem[i];
        double dist;
        als_predict(vdata, other, 0, dist); 
        indices[i-M] = i-M;
        distances[i-M] = dist;
     }
  }
  else for (int i=0; i<howmany; i++){
        int random_other = ::randi(M, M+N-1);
        vertex_data & other = latent_factors_inmem[random_other];
        double dist;
        als_predict(vdata, other, 0, dist); 
        indices[i-M] = i-M;
        distances[i-M] = dist;
   }
  
  vec out_dist(num_ratings);
  ivec indices_sorted = reverse_sort_index2(distances, indices, out_dist, num_ratings);
  assert(indices_sorted.size() <= num_ratings);
  assert(out_dist.size() <= num_ratings);
  vdata.ids = indices_sorted;
  vdata.ratings = out_dist;
  if (debug)
    printf("Closest is: %d with distance %g\n", (int)vdata.ids[0], vdata.ratings[0]);

  if (vertex.id() % 1000 == 0)
    printf("Computing recommendaitons for user %d at time: %g\n", vertex.id()+1, mytimer.current_time());
  
  
  }
 /**
  *  Vertex update function.
  */
 void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
     
     if (vertex.get_data().confirmed) {
         return;
     }
     
     VertexDataType vertexdata = vertex.get_data();
     bool propagate = false;
     if (gcontext.iteration == 0) {
         /* "Leader" of the SCC */
         if (vertexdata.color == vertex.id()) {
             propagate = true;
             vertex.remove_alloutedges();
         }
         
     } else {
         
         /* Loop over in-edges and see if there is a match */
         bool match = false;
         for(int i=0; i < vertex.num_outedges(); i++) {
             if (!vertex.outedge(i)->get_data().deleted()) {
                 if (vertex.outedge(i)->get_data().neighbor_label(vertex.id(), vertex.outedge(i)->vertexid) == vertexdata.color) {
                     match = true;
                     
                     break;
                 }
             }
         }
         if (match) {
             propagate = true;
             vertex.remove_alloutedges();
             vertex.set_data(SCCinfo(vertexdata.color, true));
         } else {
             vertex.set_data(SCCinfo(vertex.id(), false));
         }
     }
     
     
     if (propagate) {
         for(int i=0; i < vertex.num_inedges(); i++) {
             bidirectional_label edgedata = vertex.inedge(i)->get_data();
             if (!edgedata.deleted()) {
                 edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertexdata.color;
                 vertex.inedge(i)->set_data(edgedata);
                 gcontext.scheduler->add_task(vertex.inedge(i)->vertexid, true);
             }
         }
     }
 }
Example #12
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    if (vertex.id() < (uint)mi.start || vertex.id() >= (uint)mi.end)
      return;

    vertex_data& user = latent_factors_inmem[vertex.id()];
    bool rows = vertex.id() < (uint)info.get_start_node(false);
    if (info.is_square()) 
      rows = mi.A_transpose;
    (void) rows; // unused
    assert(mi.r_offset >=0);
    //store previous value for convergence detection
    if (mi.prev_offset >= 0)
      user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset];

    double val = 0;
    assert(mi.x_offset >=0 || mi.y_offset>=0);

    /*** COMPUTE r = c*A*x  ********/
    if (mi.A_offset  && mi.x_offset >= 0){
      for(int e=0; e < vertex.num_edges(); e++) {
        const edge_data & edge = vertex.edge(e)->get_data();
        const vertex_data  & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        val += (edge.weight * movie.pvec[mi.x_offset]);
      }

      if  (info.is_square() && mi.use_diag)// add the diagonal term
        val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]);

      val *= mi.c;
    }
    /***** COMPUTE r = c*I*x  *****/
    else if (!mi.A_offset && mi.x_offset >= 0){
      val = mi.c*user.pvec[mi.x_offset];
    }

    /**** COMPUTE r+= d*y (optional) ***/
    if (mi.y_offset>= 0){
      val += mi.d*user.pvec[mi.y_offset]; 
    }

    /***** compute r = (... ) / div */
    if (mi.div_offset >= 0){
      val /= user.pvec[mi.div_offset];
    }
    assert(mi.r_offset>=0 && mi.r_offset < user.pvec.size());
    user.pvec[mi.r_offset] = val;
  } //end update
Example #13
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    //go over all user nodes
    if ( vertex.num_outedges() > 0){
      vertex_data & user = latent_factors_inmem[vertex.id()]; 
      //go over all ratings
      for(int e=0; e < vertex.num_edges(); e++) {
        float observation = vertex.edge(e)->get_data();                
        vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        double estScore;
        rmse_vec[omp_get_thread_num()] += sgd_predict(user, movie, observation, estScore);
        double err = observation - estScore;
        if (std::isnan(err) || std::isinf(err))
          logstream(LOG_FATAL)<<"SGD got into numerical error. Please tune step size using --sgd_gamma and sgd_lambda" << std::endl;
        //NOTE: the following code is not thread safe, since potentially several
        //user nodes may updates this item gradient vector concurrently. However in practice it
        //did not matter in terms of accuracy on a multicore machine.
        //if you like to defend the code, you can define a global variable
        //mutex mymutex;
        //
        //and then do: mymutex.lock()
        movie.pvec += sgd_gamma*(err*user.pvec - sgd_lambda*movie.pvec);
        //and here add: mymutex.unlock();
        user.pvec += sgd_gamma*(err*movie.pvec - sgd_lambda*user.pvec);
      }
    }

  }
Example #14
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    vdata.rmse = 0;
    mat XtX = mat::Zero(NLATENT, NLATENT); 
    vec Xty = vec::Zero(NLATENT);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      const edge_data & edge = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Map<vec> X(nbr_latent.pvec, NLATENT);
      Xty += X * edge.weight * edge.time;
      XtX.triangularView<Eigen::Upper>() += X * X.transpose() * edge.time;
      if (compute_rmse) {
        double prediction;
        vdata.rmse += wals_predict(vdata, nbr_latent, edge.weight, prediction) * edge.time;
      }
    }
    // Diagonal
    for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges();
    // Solve the least squares problem with eigen using Cholesky decomposition
    Map<vec> vdata_vec(vdata.pvec, NLATENT);
    vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
Example #15
0
    /**
     * Grab pivot's adjacency list into memory.
     */
    int load_edges_into_memory(graphchi_vertex<uint32_t, edge_data> &v) {
      assert(is_pivot(v.id()));
      assert(is_user(v.id()));

      int num_edges = v.num_edges();

      dense_adj dadj;
      for(int i=0; i<num_edges; i++) 
        set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight);
      //dadj.ratings = zeros(N);
      dadj.vid = v.id();
      adjs[v.id() - pivot_st] = dadj;
      assert(v.id() - pivot_st < adjs.size());
      __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/);
      return num_edges;
    }
Example #16
0
  /**
   *  compute validaton RMSE for a single user
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    if (user_nodes && vertex.id() >= M)
      return;
    else if (!user_nodes && vertex.id() < M)
      return;
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    for(int e=0; e < vertex.num_outedges(); e++) {
      const EdgeDataType & observation = vertex.edge(e)->get_data();
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      double prediction;
      double rmse = (*pprediction_func)(vdata, nbr_latent, observation, prediction, NULL);
//      assert(rmse <= pow(maxval - minval, 2));	<ice>
      assert(validation_rmse_vec.size() > omp_get_thread_num());
      validation_rmse_vec[omp_get_thread_num()] += rmse;
    }
  }
Example #17
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    if (gcontext.iteration == 0){
      if (vertex.num_outedges() == 0 && vertex.id() < M)
        logstream(LOG_FATAL)<<"NMF algorithm can not work when the row " << vertex.id() << " of the matrix contains all zeros" << std::endl;
      for(int e=0; e < vertex.num_edges(); e++) {
        float observation = vertex.edge(e)->get_data();                
        if (observation < 0 ){
          logstream(LOG_FATAL)<<"Found a negative entry in matirx row " << vertex.id() << " with value: " << observation << std::endl;
        }
      }
      return;   
    }

    bool isuser = (vertex.id() < M);
    if ((iter % 2 == 1 && !isuser) ||
        (iter % 2 == 0 && isuser))
      return;
    
    vec ret = zeros(D);

    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    
    for(int e=0; e < vertex.num_edges(); e++) {
      float observation = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      double prediction;
      rmse_vec[omp_get_thread_num()] += nmf_predict(vdata, nbr_latent, observation, prediction);
      if (prediction == 0)
        logstream(LOG_FATAL)<<"Got into numerical error! Please submit a bug report." << std::endl;
      ret += nbr_latent.pvec * (observation / prediction);
    }
    
    vec px;
    if (isuser)
      px = sum_of_item_latent_features;
    else 
      px = sum_of_user_latent_feautres;
    for (int i=0; i<D; i++){
      assert(px[i] != 0);
      vdata.pvec[i] *= ret[i] / px[i];
      if (vdata.pvec[i] < epsilon)
        vdata.pvec[i] = epsilon;
    }
  }
    /**
     *  Vertex update function.
     *  On first iteration ,each vertex chooses a label = the vertex id.
     *  On subsequent iterations, each vertex chooses the minimum of the neighbor's
     *  label (and itself).
     */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext)
    {
        /* This program requires selective scheduling. */
        assert(gcontext.scheduler != NULL);

        if(gcontext.iteration == 0)
        {
            set_data(vertex, vertex.id());
            /* Schedule neighbor for update */
            gcontext.scheduler->add_task(vertex.id());
            return;
        }
        else
        {
            vid_t curmin = vertex_values[vertex.id()];
            for(int i=0; i < vertex.num_edges(); i++)
            {
                vid_t nblabel = neighbor_value(vertex.edge(i));
                curmin = std::min(nblabel, curmin);

            }
            if ( curmin < vertex.get_data() )
            {
                for(int i=0; i < vertex.num_edges(); i++)
                {
                    if (curmin < neighbor_value(vertex.edge(i)))
                    {
                        /* Schedule neighbor for update */
                        gcontext.scheduler->add_task(vertex.edge(i)->vertex_id());
                    }
                }
                set_data(vertex, curmin);
            }
        }



        /* On subsequent iterations, find the minimum label of my neighbors */


        /* If my label changes, schedule neighbors */


    }
 /**
  *  Vertex update function.
  */
 void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
     int ninedges = 0;
     if (gcontext.iteration == 0) {
         for(int i=0; i < vertex.num_inedges(); i++) {
             vertex.inedge(i)->set_data(vertex.id());        
             ninedges++;
         }
     } else {
         // Keep track of the number of edegs to ensure that
         // deletion works fine.
         if (vertex.get_data() != vertex.num_inedges())  {
             logstream(LOG_ERROR) << "Discrepancy in edge counts: " << vertex.get_data() << " != " << vertex.num_inedges() << std::endl;
         }
         assert(vertex.get_data() == vertex.num_inedges());
         
         for(int i=0; i < vertex.num_outedges(); i++) {
             graphchi_edge<vid_t> * edge = vertex.outedge(i);
             vid_t outedgedata = edge->get_data();
             vid_t expected = edge->vertex_id() + gcontext.iteration - (edge->vertex_id() > vertex.id());
             if (!is_deleted_edge_value(edge->get_data())) {
                 if (outedgedata != expected) {
                     logstream(LOG_ERROR) << outedgedata << " != " << expected << std::endl;
                     assert(false);
                 }
             }
         }
         for(int i=0; i < vertex.num_inedges(); i++) {
             vertex.inedge(i)->set_data(vertex.id() + gcontext.iteration);
             
             if (std::rand()  % 4 == 1) {
                 vertex.remove_inedge(i);
                 __sync_add_and_fetch(&ndeleted, 1);
             } else {
                 ninedges++;
             }
         }
     }
     
     if (gcontext.iteration == gcontext.num_iterations - 1) {
         vertex.set_data(gcontext.iteration + 1);
     } else {
         vertex.set_data(ninedges);
     }
 }
Example #20
0
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		
		assert(vertex.num_inedges() * vertex.num_outedges() <= product);

		for(int i=0; i<vertex.num_outedges(); i++){
			bidirectional_label edgedata = vertex.outedge(i)->get_data();
			if(edgedata.is_equal()){		
				if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){
					lock.lock();
						fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
					lock.unlock();
					continue;
				}
			}
			lock.lock();
			fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
			lock.unlock();
		}
	}
    /**
      * Pagerank update function.
	  */
	void update(graphchi_vertex<VType, EType> &v, graphchi_context &ginfo) {
		//array[v.id()]++;		
		if(v.num_edges() == 0)	return;
		if (ginfo.iteration == 0) {
			//int partid = getPId(v.id());	
			vid_t newid = getNewId(v.id()); 	
			v.set_data(newid);
			for(int i=0; i<v.num_edges(); i++){
				graphchi_edge<EType> * edge = v.edge(i);
				EType edata = edge->get_data();
				edata.my_label(v.id(), edge->vertex_id()) = newid;
				edge->set_data(edata);
			}	
		} else if(ginfo.iteration == 1){
			/*
			if(v.id() == 0){
				fprintf(fp_list, "%u %u\n", num_vertices, num_edges);	
			}
			*/
			if(v.num_outedges() > 0){	
				vid_t mylabel = v.get_data();
				for(int i=0; i<v.num_outedges(); i++){
					graphchi_edge<EType> * edge = v.outedge(i);
					EType edata = edge->get_data();
					vid_t nblabel = edata.nb_label(v.id(), edge->vertex_id());
					//vid_t nb_id = edge->vertex_id();
					assert(mylabel != nblabel);
					if(!flag_weight){
						lock.lock();
						fprintf(fp_list, "%u\t%u\n", mylabel, nblabel);		
						lock.unlock();
					}else{
						lock.lock();
						fprintf(fp_list, "%u\t%u\t%.3f\n", mylabel, nblabel, edata.weight);		
						lock.unlock();
					}
					//edge->set_data(edata);	
				}
			}/*else{
				fprintf(fp_list, "\n");
			}*/
		}
	}
Example #22
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    //go over all user nodes
    if ( vertex.num_outedges() > 0 && (algo == GLOBAL_MEAN || algo == USER_MEAN)){
      vertex_data & user = latent_factors_inmem[vertex.id()]; 

      //go over all ratings
      if (algo == USER_MEAN){
        for(int e=0; e < vertex.num_edges(); e++) {
          float observation = vertex.edge(e)->get_data();                
          user.mean_rating += observation;
        }
        if (vertex.num_edges() > 0)
          user.mean_rating /= vertex.num_edges();
      }

      //go over all ratings
      for(int e=0; e < vertex.num_edges(); e++) {
        double prediction;
        float observation = vertex.edge(e)->get_data();                
        vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        rmse_vec[omp_get_thread_num()] += baseline_predict(user, movie, observation, prediction);
      }
    }
    else if (vertex.num_inedges() > 0 && algo == ITEM_MEAN){
      vertex_data & user = latent_factors_inmem[vertex.id()]; 

      //go over all ratings
      for(int e=0; e < vertex.num_edges(); e++) {
        float observation = vertex.edge(e)->get_data();                
        user.mean_rating += observation;
      } 
      if (vertex.num_edges() > 0)
        user.mean_rating /= vertex.num_edges();

      for(int e=0; e < vertex.num_edges(); e++) {
        float observation = vertex.edge(e)->get_data();                
        double prediction;
        vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        rmse_vec[omp_get_thread_num()] += baseline_predict(movie, user, observation, prediction);
      }
    }
  }
Example #23
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
      if ( vertex.num_outedges() > 0){
        vertex_data & user = latent_factors_inmem[vertex.id()]; 

        memset(&user.weight[0], 0, sizeof(double)*D);
        for(int e=0; e < vertex.num_outedges(); e++) {
          vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; 
          user.weight += movie.weight;

        }
        // sqrt(|N(u)|) 
        float usrNorm = double(1.0/sqrt(vertex.num_outedges()));
        //sqrt(|N(u)| * sum_j y_j
        user.weight *= usrNorm;

        vec step = zeros(D);

        // main algorithm, see Koren's paper, just below below equation (16)
        for(int e=0; e < vertex.num_outedges(); e++) {
          vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; 
          float observation = vertex.edge(e)->get_data();                
          double estScore;
          rmse_vec[omp_get_thread_num()] += svdpp_predict(user, movie,observation, estScore); 
          // e_ui = r_ui - \hat{r_ui}
          float err = observation - estScore;
          assert(!std::isnan(rmse_vec[omp_get_thread_num()]));
          vec itmFctr = movie.pvec;
          vec usrFctr = user.pvec;

          //q_i = q_i + gamma2     *(e_ui*(p_u      +  sqrt(N(U))\sum_j y_j) - gamma7    *q_i)
          for (int j=0; j< D; j++)
            movie.pvec[j] += svdpp.itmFctrStep*(err*(usrFctr[j] +  user.weight[j])             - svdpp.itmFctrReg*itmFctr[j]);
          //p_u = p_u + gamma2    *(e_ui*q_i   -gamma7     *p_u)
          for (int j=0; j< D; j++)
            user.pvec[j] += svdpp.usrFctrStep*(err *itmFctr[j] - svdpp.usrFctrReg*usrFctr[j]);
          step += err*itmFctr;

          //b_i = b_i + gamma1*(e_ui - gmma6 * b_i) 
          movie.bias += svdpp.itmBiasStep*(err-svdpp.itmBiasReg* movie.bias);
          //b_u = b_u + gamma1*(e_ui - gamma6 * b_u)
          user.bias += svdpp.usrBiasStep*(err-svdpp.usrBiasReg* user.bias);
        }

        step *= float(svdpp.itmFctr2Step*usrNorm);
        //gamma7 
        double mult = svdpp.itmFctr2Step*svdpp.itmFctr2Reg;
        for(int e=0; e < vertex.num_edges(); e++) {
          vertex_data&  movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
          //y_j = y_j  +   gamma2*sqrt|N(u)| * q_i - gamma7 * y_j
          movie.weight +=  step                    -  mult  * movie.weight;
        }
      }
  }
Example #24
0
 /** 
   * Compute size of the relevant intersection of v and a pivot
   */
 int intersection_size(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int start_i) {
     assert(is_pivot(pivot));
     int count = 0;
     if (pivot > v.id()) {
         dense_adj &dadj = adjs[pivot - pivot_st];
         int vc = v.num_edges();
          
         /**
           * If the adjacency list sizes are not too different, use
           * 'merge'-type of operation to compute size intersection.
           */
         if (dadj.count < 32 * (vc - start_i)) { // TODO: do real profiling to find best cutoff value
             // Do merge-style of check
             assert(v.edge(start_i)->vertex_id() == pivot);
             int i1 = 0;
             int i2 = start_i+1;
             int nedges = v.num_edges(); 
             
             while (i1 < dadj.count && i2 < nedges) {
                 vid_t dst = v.edge(i2)->vertexid;
                 vid_t a = dadj.adjlist[i1];
                 if (a == dst) {
                     /* Add one to edge between v and the match */
                     v.edge(i2)->set_data(v.edge(i2)->get_data() + 1);
                     count++;
                     i1++; i2++;
                     
                 } else {
                     i1 += a < dst;
                     i2 += a > dst;
                 }  
             }
         } else {
             /**
               * Otherwise, use linear/binary search.
               */
             vid_t lastvid = 0;
             for(int i=start_i+1; i < vc; i++) {
                 vid_t nb = v.edge(i)->vertexid;
                 if (nb > pivot && nb != lastvid) {
                     int match = findadj(dadj.adjlist, dadj.count, nb);
                     count += match;
                     if (match > 0) {
                         /* Add one to edge between v and the match */
                         v.edge(i)->set_data(v.edge(i)->get_data() + 1);
                     }
                 }
                 lastvid = nb;
             }
         }
     }        
     return count;
 }
Example #25
0
	/**
	 *  Vertex update function.
	 */
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
                if (vertex.num_outedges() > 0){
                        assert(vertex.id() < Me);
                        assert(validation_targets[vertex.id()] == -1 || validation_targets[vertex.id()] == 1);
			double sum = 0;
                        for(int e=0; e < vertex.num_outedges(); e++) {
                             uint other = vertex.edge(e)->vertex_id();
                             assert(other >= M);
                             sum += mu_ij[other];
			}
                        double p0 = phi(-1 * sum / sqrt(beta));
                        double p1 = phi(1 * sum / sqrt(beta));
                        double predict = sum > 0 ? 1 : -1;                       
                        latent_factors_inmem[vertex.id()].predict = sum; 
 
                        if (predict != validation_targets[vertex.id()])
			   err_vec[omp_get_thread_num()]++;
                        if (debug)
                            std::cout<<"node: " << vertex.id() << " sum is: " << sum << " p0: " << p0 << " p1: " << p1 << " target: " << validation_targets[vertex.id()] << std::endl;
                }
	}
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    if (debug)
      logstream(LOG_DEBUG)<<"Entering node: " << vertex.id() << " seed? " << vdata.seed << " in vector: " << vdata.pvec << std::endl;
    if (vdata.seed || vertex.num_outedges() == 0) //if this is a seed node, don't do anything
      return;
    vec ret = zeros(D);

    for(int e=0; e < vertex.num_outedges(); e++) {
      float weight = vertex.edge(e)->get_data();                
      assert(weight != 0);
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      ret += weight * nbr_latent.pvec;
    }

    //normalize probabilities
    assert(sum(ret) != 0);
    ret = ret / sum(ret);
    vdata.pvec = alpha * vdata.pvec + (1-alpha)*ret;
    vdata.pvec/= sum(vdata.pvec);
  }
Example #27
0
 void update(graphchi_vertex<VertexDataType, EdgeDataType>& v, graphchi_context& ginfo)
 {
     if (ginfo.iteration > 0) {
         float sum = 0;
         for (int i = 0; i < v.num_inedges(); i++) {
             sum += pr[v.inedge(i)->vertexid];
         }
         if (v.outc > 0) {
             pr[v.id()] = (RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum) / v.outc;
         } else {
             pr[v.id()] = (RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum);
         }
     } else if (ginfo.iteration == 0) {
         if (v.outc > 0)
             pr[v.id()] = 1.0f / v.outc;
     }
     if (ginfo.iteration == ginfo.num_iterations - 1) {
         /* On last iteration, multiply pr by degree and store the result */
         v.set_data(v.outc > 0 ? pr[v.id()] * v.outc : pr[v.id()]);
     }
 }
    /**
     *  Vertex update function.
     */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {        
        if (first_iteration) {
            vertex.set_data(SCCinfo(vertex.id()));
        }
        
        if (vertex.get_data().confirmed) {
            return;
        }   
        
        /* Vertices with only in or out edges cannot be part of a SCC (Trimming) */
        if (vertex.num_inedges() == 0 || vertex.num_outedges() == 0) {
            if (vertex.num_edges() > 0) {
                // TODO: check this logic!
                vertex.set_data(SCCinfo(vertex.id()));
            }
            vertex.remove_alledges();
            return;
        }
        remainingvertices = true;

        VertexDataType vertexdata = vertex.get_data();
        bool propagate = false;
        if (gcontext.iteration == 0) {
            vertexdata = vertex.id();
            propagate = true;
            /* Clean up in-edges. This would be nicer in the messaging abstraction... */
            for(int i=0; i < vertex.num_inedges(); i++) {
                bidirectional_label edgedata = vertex.inedge(i)->get_data();
                edgedata.my_label(vertex.id(), vertex.inedge(i)->vertexid) = vertex.id();
                vertex.inedge(i)->set_data(edgedata);
            }
        } else {
            
            /* Loop over in-edges and choose minimum color */
            vid_t minid = vertexdata.color;
            for(int i=0; i < vertex.num_inedges(); i++) {
                minid = std::min(minid, vertex.inedge(i)->get_data().neighbor_label(vertex.id(), vertex.inedge(i)->vertexid));
            }
            
            if (minid != vertexdata.color) {
                vertexdata.color = minid;
                propagate = true;
            }            
        }
        vertex.set_data(vertexdata);
        
        if (propagate) {
            for(int i=0; i < vertex.num_outedges(); i++) {
                bidirectional_label edgedata = vertex.outedge(i)->get_data();
                edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid) = vertexdata.color;
                vertex.outedge(i)->set_data(edgedata);
                gcontext.scheduler->add_task(vertex.outedge(i)->vertexid, true);
            }
        }
    }
Example #29
0
  /**
   *  Vertex update function.
   *  On first iteration ,each vertex chooses a label = the vertex id.
   *  On subsequent iterations, each vertex chooses the minimum of the neighbor's
   *  label (and itself).
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    /* On subsequent iterations, find the minimum label of my neighbors */
    if (!edge_count){
      vid_t curmin = vertex_values[vertex.id()];
      //first time, count the number of nodes which actually have edges
      if (gcontext.iteration == 0 && vertex.num_edges() > 0){
        mymutex.lock(); actual_vertices++; mymutex.unlock();
      }
      for(int i=0; i < vertex.num_edges(); i++) {
        vid_t nblabel = neighbor_value(vertex.edge(i));
        curmin = std::min(nblabel, curmin);
      }

      //in case of a new min reschedule neighbors
      if (vertex_values[vertex.id()] > curmin) {
        changes++;
        set_data(vertex, curmin);
        for (int i=0; i< vertex.num_edges(); i++){
          active_nodes[vertex.edge(i)->vertex_id()] = true;
        }
      }
      else active_nodes[vertex.id()] = false;
    }
    else {
      vid_t curmin = vertex_values[vertex.id()];
      for(int i=0; i < vertex.num_edges(); i++) {
        vid_t nblabel = neighbor_value(vertex.edge(i));
        curmin = std::min(nblabel, curmin);
        if (vertex.edge(i)->vertex_id() > vertex.id()){
        mymutex.lock();
        state[curmin]++;
        mymutex.unlock();
        }
      }
    }
  }
Example #30
0
  /**
   *  compute validaton AP for a single user
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    if (user_nodes && vertex.id() >= M)
      return;
    else if (!user_nodes && vertex.id() < M)
      return;
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    vec ratings = zeros(vertex.num_outedges());
    vec real_vals = zeros(vertex.num_outedges());
    if (ratings.size() > 0){
      users_vec[omp_get_thread_num()]++;
      int j=0;
      int real_click_count = 0;
      for(int e=0; e < vertex.num_outedges(); e++) {
        const EdgeDataType & observation = vertex.edge(e)->get_data();
        vertex_data & pdata = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        double prediction;
        (*pprediction_func)(vdata, pdata, observation, prediction, NULL);
        ratings[j] = prediction;
        real_vals[j] = observation;
        if (observation > 0)
          real_click_count++;
        j++;
      }
      int count = 0;
      double ap = 0;
      ivec pos = sort_index(ratings);
      for (int j=0; j< std::min(ap_number, (int)ratings.size()); j++){
        if (real_vals[pos[ratings.size() - j - 1]] > 0)
          ap += (++count * 1.0/(j+1));
      }
      if (real_click_count > 0 )
        ap /= real_click_count;
      else ap = 0;
      sum_ap_vec[omp_get_thread_num()] += ap;
    }
  }