示例#1
0
    /**
      * Pagerank update function.
      */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) {
        float sum=0;
        if (ginfo.iteration == 0) {
            /* On first iteration, initialize vertex and out-edges. 
               The initialization is important,
               because on every run, GraphChi will modify the data in the edges on disk. 
             */
	    update_edge_data(v, 1.0);
            v.set_data(RANDOMRESETPROB); 
        } else {
            /* Compute the sum of neighbors' weighted pageranks by
               reading from the in-edges. */
            for(int i=0; i < v.num_inedges(); i++) {
                //float val = v.inedge(i)->get_data();
                //sum += val;                    
		struct weightE eData = v.inedge(i)->get_data();
		sum += eData.pagerank;
            }
            
            /* Compute my pagerank */
            float pagerank = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum;
            
            /* Write my pagerank divided by the number of out-edges to
               each of my out-edges. */
	    update_edge_data(v, pagerank);
                
            /* Keep track of the progression of the computation.
               GraphChi engine writes a file filename.deltalog. */
            ginfo.log_change(std::abs(pagerank - v.get_data()));
            
            /* Set my new pagerank as the vertex value */
            v.set_data(pagerank); 
        }
    }
 /**
  *  Vertex update function.
  */
 void update(graphchi_vertex<VertexDataType, EdgeDataType > &vertex, graphchi_context &gcontext) {
     if (gcontext.iteration == 0) {
         for(int i=0; i < vertex.num_outedges(); i++) {
             chivector<vid_t> * evector = vertex.outedge(i)->get_vector();
             evector->clear();
             assert(evector->size() == 0);
             
             evector->add(vertex.id());
             assert(evector->size() == 1);
             assert(evector->get(0) == vertex.id());
         }
         
     } else {
         for(int i=0; i < vertex.num_inedges(); i++) {
             graphchi_edge<EdgeDataType> * edge = vertex.inedge(i);
             chivector<vid_t> * evector = edge->get_vector();
             assert(evector->size() >= gcontext.iteration);
             for(int j=0; j < evector->size(); j++) {
                 vid_t expected = edge->vertex_id() + j;
                 vid_t has = evector->get(j);
                 if (has != expected) {
                     std::cout << "Mismatch: " << has << " != " << expected << std::endl;
                 }
                 assert(has == expected);
             }
         }
         for(int i=0; i < vertex.num_outedges(); i++) {
             vertex.outedge(i)->get_vector()->add(vertex.id() + gcontext.iteration);
         }
     }
     vertex.set_data(gcontext.iteration + 1);
 }
示例#3
0
  /**
   *  Vertex update function.
   *  On first iteration ,each vertex chooses a label = the vertex id.
   *  On subsequent iterations, each vertex chooses the minimum of the neighbor's
   *  label (and itself).
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    /* On subsequent iterations, find the minimum label of my neighbors */
    if (!edge_count){
      vid_t curmin = vertex_values[vertex.id()];
      if (gcontext.iteration == 0 && vertex.num_edges() > 0){
        mymutex.lock(); actual_vertices++; mymutex.unlock();
      }
      for(int i=0; i < vertex.num_edges(); i++) {
        vid_t nblabel = neighbor_value(vertex.edge(i));
        curmin = std::min(nblabel, curmin);
      }

      if (vertex_values[vertex.id()] > curmin) {
        changes++;
        set_data(vertex, curmin);
      }
    }
    else {
      vid_t curmin = vertex_values[vertex.id()];
      for(int i=0; i < vertex.num_edges(); i++) {
        vid_t nblabel = neighbor_value(vertex.edge(i));
        curmin = std::min(nblabel, curmin);
        if (vertex.edge(i)->vertex_id() > vertex.id()){
        mymutex.lock();
        state[curmin]++;
        mymutex.unlock();
        }
      }
    }
  }
示例#4
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    //go over all user nodes
    if ( vertex.num_outedges() > 0){
      vertex_data & user = latent_factors_inmem[vertex.id()]; 
      //go over all ratings
      for(int e=0; e < vertex.num_edges(); e++) {
        float observation = vertex.edge(e)->get_data();                
        vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        double estScore;
        rmse_vec[omp_get_thread_num()] += sgd_predict(user, movie, observation, estScore);
        double err = observation - estScore;
        if (std::isnan(err) || std::isinf(err))
          logstream(LOG_FATAL)<<"SGD got into numerical error. Please tune step size using --sgd_gamma and sgd_lambda" << std::endl;
        //NOTE: the following code is not thread safe, since potentially several
        //user nodes may updates this item gradient vector concurrently. However in practice it
        //did not matter in terms of accuracy on a multicore machine.
        //if you like to defend the code, you can define a global variable
        //mutex mymutex;
        //
        //and then do: mymutex.lock()
        movie.pvec += sgd_gamma*(err*user.pvec - sgd_lambda*movie.pvec);
        //and here add: mymutex.unlock();
        user.pvec += sgd_gamma*(err*movie.pvec - sgd_lambda*user.pvec);
      }
    }

  }
示例#5
0
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		if(gcontext.iteration == 0){
			if(vertex.num_edges() == 0)	 return;
			VertexDataType vertexdata = vertex.get_data();
			if(!vertexdata.confirmed || !vertexdata.reconfirmed)
				return ;	
			//assert(vertex.num_inedges() * vertex.num_outedges() <= product);
			int ct = 0;		
			for(int i=0; i<vertex.num_edges(); i++){
				graphchi_edge<EdgeDataType>* edge = vertex.edge(i);
				bidirectional_label edgedata = edge->get_data();
				if(edgedata.is_equal()){		
					/*
					if(edgedata.smaller_one != 0)
						std::cout<<edgedata.smaller_one<<" \t"<<edgedata.larger_one<<"\t root="<<root<<std::endl;
					*/
					if(root == edgedata.my_label(vertex.id(), edge->vertexid)){
						ct++;	
					}
				}
				/*
				   lock.lock();
				   fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
				   lock.unlock();
				   */
			}
			assert(ct > 1);
		}
	}
示例#6
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    vdata.rmse = 0;
    mat XtX = mat::Zero(NLATENT, NLATENT); 
    vec Xty = vec::Zero(NLATENT);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      const edge_data & edge = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Map<vec> X(nbr_latent.pvec, NLATENT);
      Xty += X * edge.weight * edge.time;
      XtX.triangularView<Eigen::Upper>() += X * X.transpose() * edge.time;
      if (compute_rmse) {
        double prediction;
        vdata.rmse += wals_predict(vdata, nbr_latent, edge.weight, prediction) * edge.time;
      }
    }
    // Diagonal
    for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges();
    // Solve the least squares problem with eigen using Cholesky decomposition
    Map<vec> vdata_vec(vdata.pvec, NLATENT);
    vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
示例#7
0
	/**
	 *  Vertex update function.
	 */
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		//go over all samples (rows)
		if ( vertex.num_outedges() > 0){

			assert(vertex.id() < M);
			vertex_data & row = latent_factors_inmem[vertex.id()]; 
                        assert(row.y == -1 || row.y == 1);

			if (debug)
				std::cout<<"Entered item " << vertex.id() << " y: " << row.y << std::endl;
			row.sigma = beta*beta;
			row.xT_mu = 0;

			//go over all features
			for(int e=0; e < vertex.num_outedges(); e++) {
                                uint feature_id = vertex.edge(e)->vertex_id();
				edge_data edge = vertex.edge(e)->get_data();                

				assert(sigma_ij[feature_id] > 0);
                                assert(edge.x_ij  == 1);

                                /* compute equation (6) */
				row.sigma += edge.x_ij * sigma_ij[feature_id];
                                /* compute the sum xT*w as needed in equations (7) and (8) */
				row.xT_mu += edge.x_ij * mu_ij[feature_id];
                                
			}
			double prediction;
			double ret = ctr_predict(row, row, row.y, prediction);
                        double predicted_target = prediction < 0 ? -1: 1;
			if ((predicted_target == -1  && row.y == 1) || (predicted_target == 1 && row.y == -1))
				err_vec[omp_get_thread_num()] += 1.0;  
                        if (debug)
                                std::cout<<"Prediction was: " << prediction << " real value: " << row.y << std::endl;
			liklihood_vec[omp_get_thread_num()] += ret;

			assert(row.sigma > 0);

			//go over all features
			for(int e=0; e < vertex.num_outedges(); e++) {
				edge_data edge = vertex.edge(e)->get_data();                
                                uint feature_id = vertex.edge(e)->vertex_id();
				assert(row.sigma > 0);
				double product = row.y * row.xT_mu / sqrt(row.sigma);
				mu_ij[feature_id] +=  (row.y * edge.x_ij *  sigma_ij[feature_id]  / sqrt(row.sigma)) * v(product);
				//if (debug)
				//    std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " v(product): " << v(product) << " value: " <<(row.y * edge.x_ij *  edge.sigma_ij * edge.sigma_ij / sqrt(row.sigma)) * v(product) << std::endl; 
				double factor = 1.0 - (edge.x_ij * sigma_ij[feature_id] / row.sigma)*w(product); 
				//if (debug)
				//    std::cout<<"Added to edge: "<< vertex.edge(e)->vertex_id() << " product: " << product << " w(product): " << w(product) << " factor: " << (1.0 - (edge.x_ij * edge.sigma_ij / row.sigma)*w(product)) << " sigma_ij " << edge.sigma_ij << "  product: " << edge.sigma_ij * factor << std::endl; 

				assert(factor > 0);
				sigma_ij[feature_id] *= factor;
                                assert(sigma_ij[feature_id] > 0);
			}

		}
	}
 // Helper
 virtual void set_latent_factor(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, latentvec_t &fact) {
     vertex.set_data(fact);
     for(int i=0; i < vertex.num_edges(); i++) {
         als_factor_and_weight factwght = vertex.edge(i)->get_data();
         factwght.factor = fact;
         vertex.edge(i)->set_data(factwght);   // Note that neighbors override the values they have written to edges.
                                               // This is ok, because vertices are always executed in same order.
     }
 }
 void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
     
     // Loop over only in-edges and output them. This way deleted edges won't be included.
     for(int i=0; i < vertex.num_inedges(); i++) {
         graphchi_edge<EdgeDataType> * e = vertex.inedge(i);
         ((sharded_graph_output<VertexDataType, EdgeDataType> *)gengine->output(CONTRACTED_GRAPH_OUTPUT))->output_edgeval(e->vertex_id(), vertex.id(),
                                                                  e->get_data());
     }
 }
示例#10
0
 /** 
   * Compute size of the relevant intersection of v and a pivot
   */
 int intersection_size(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int start_i) {
     assert(is_pivot(pivot));
     int count = 0;
     if (pivot > v.id()) {
         dense_adj &dadj = adjs[pivot - pivot_st];
         int vc = v.num_edges();
          
         /**
           * If the adjacency list sizes are not too different, use
           * 'merge'-type of operation to compute size intersection.
           */
         if (dadj.count < 32 * (vc - start_i)) { // TODO: do real profiling to find best cutoff value
             // Do merge-style of check
             assert(v.edge(start_i)->vertex_id() == pivot);
             int i1 = 0;
             int i2 = start_i+1;
             int nedges = v.num_edges(); 
             
             while (i1 < dadj.count && i2 < nedges) {
                 vid_t dst = v.edge(i2)->vertexid;
                 vid_t a = dadj.adjlist[i1];
                 if (a == dst) {
                     /* Add one to edge between v and the match */
                     v.edge(i2)->set_data(v.edge(i2)->get_data() + 1);
                     count++;
                     i1++; i2++;
                     
                 } else {
                     i1 += a < dst;
                     i2 += a > dst;
                 }  
             }
         } else {
             /**
               * Otherwise, use linear/binary search.
               */
             vid_t lastvid = 0;
             for(int i=start_i+1; i < vc; i++) {
                 vid_t nb = v.edge(i)->vertexid;
                 if (nb > pivot && nb != lastvid) {
                     int match = findadj(dadj.adjlist, dadj.count, nb);
                     count += match;
                     if (match > 0) {
                         /* Add one to edge between v and the match */
                         v.edge(i)->set_data(v.edge(i)->get_data() + 1);
                     }
                 }
                 lastvid = nb;
             }
         }
     }        
     return count;
 }
示例#11
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
      if ( vertex.num_outedges() > 0){
        vertex_data & user = latent_factors_inmem[vertex.id()]; 

        memset(&user.weight[0], 0, sizeof(double)*D);
        for(int e=0; e < vertex.num_outedges(); e++) {
          vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; 
          user.weight += movie.weight;

        }
        // sqrt(|N(u)|) 
        float usrNorm = double(1.0/sqrt(vertex.num_outedges()));
        //sqrt(|N(u)| * sum_j y_j
        user.weight *= usrNorm;

        vec step = zeros(D);

        // main algorithm, see Koren's paper, just below below equation (16)
        for(int e=0; e < vertex.num_outedges(); e++) {
          vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; 
          float observation = vertex.edge(e)->get_data();                
          double estScore;
          rmse_vec[omp_get_thread_num()] += svdpp_predict(user, movie,observation, estScore); 
          // e_ui = r_ui - \hat{r_ui}
          float err = observation - estScore;
          assert(!std::isnan(rmse_vec[omp_get_thread_num()]));
          vec itmFctr = movie.pvec;
          vec usrFctr = user.pvec;

          //q_i = q_i + gamma2     *(e_ui*(p_u      +  sqrt(N(U))\sum_j y_j) - gamma7    *q_i)
          for (int j=0; j< D; j++)
            movie.pvec[j] += svdpp.itmFctrStep*(err*(usrFctr[j] +  user.weight[j])             - svdpp.itmFctrReg*itmFctr[j]);
          //p_u = p_u + gamma2    *(e_ui*q_i   -gamma7     *p_u)
          for (int j=0; j< D; j++)
            user.pvec[j] += svdpp.usrFctrStep*(err *itmFctr[j] - svdpp.usrFctrReg*usrFctr[j]);
          step += err*itmFctr;

          //b_i = b_i + gamma1*(e_ui - gmma6 * b_i) 
          movie.bias += svdpp.itmBiasStep*(err-svdpp.itmBiasReg* movie.bias);
          //b_u = b_u + gamma1*(e_ui - gamma6 * b_u)
          user.bias += svdpp.usrBiasStep*(err-svdpp.usrBiasReg* user.bias);
        }

        step *= float(svdpp.itmFctr2Step*usrNorm);
        //gamma7 
        double mult = svdpp.itmFctr2Step*svdpp.itmFctr2Reg;
        for(int e=0; e < vertex.num_edges(); e++) {
          vertex_data&  movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
          //y_j = y_j  +   gamma2*sqrt|N(u)| * q_i - gamma7 * y_j
          movie.weight +=  step                    -  mult  * movie.weight;
        }
      }
  }
示例#12
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    bool isuser = vertex.id() < M;
    mat XtX = mat::Zero(D, D); 
    vec Xty = vec::Zero(D);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      const edge_data & edge = vertex.edge(e)->get_data();
      float observation = edge.weight;                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Xty += nbr_latent.pvec * observation;
      XtX.triangularView<Eigen::Upper>() += nbr_latent.pvec * nbr_latent.pvec.transpose();
      if (compute_rmse) {
        double prediction;
        rmse_vec[omp_get_thread_num()] += pmf_predict(vdata, nbr_latent, observation, prediction, (void*)&edge.avgprd);
        vertex.edge(e)->set_data(edge);
      }
    }

    double regularization = lambda;
    if (regnormal)
      lambda *= vertex.num_edges();
    for(int i=0; i < D; i++) XtX(i,i) += regularization;

    // Solve the least squares problem with eigen using Cholesky decomposition
    mat iAi_;
    bool ret =inv((isuser? A_U : A_V) + alpha *  XtX, iAi_);
    assert(ret);
    vec mui_ =  iAi_*((isuser? (A_U*mu_U) : (A_V*mu_V)) + alpha * Xty); 
    vdata.pvec = mvnrndex(mui_, iAi_, D, 0); 
    assert(vdata.pvec.size() == D);
 }
示例#13
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    vdata.rmse = 0;
    mat XtX = mat::Zero(NLATENT, NLATENT); 
    vec Xty = vec::Zero(NLATENT);

    bool compute_rmse = is_user(vertex.id()); 
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      float observation = vertex.edge(e)->get_data().weight;                
      uint time = vertex.edge(e)->get_data().time;
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      vertex_data & time_node = latent_factors_inmem[time];
      assert(time != vertex.id() && time != vertex.edge(e)->vertex_id());
      Map<vec> X(nbr_latent.pvec, NLATENT);
      Map<vec> Y(time_node.pvec, NLATENT);
      vec XY = X.cwiseProduct(Y);
      Xty += XY * observation;
      XtX.triangularView<Eigen::Upper>() += XY * XY.transpose();
      if (compute_rmse) {
        double prediction;
        vdata.rmse += als_tensor_predict(vdata, nbr_latent, time_node, observation, prediction);
      }
    }

    for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges();

    // Solve the least squares problem with eigen using Cholesky decomposition
    Map<vec> vdata_vec(vdata.pvec, NLATENT);
    vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
  /** Scores all documents for the query. The first step in update(). */
  void score_documents(graphchi_vertex<TypeVertex, FeatureEdge> &query,
                       graphchi_context &ginfo) {
    // XXX
//    std::map<double, FeatureEdge> scores;
    for (int doc = 0; doc < query.num_outedges(); doc++) {
      FeatureEdge* fe = query.outedge(doc)->get_vector();
      fe->header().score = model->score(fe->get_data());
//      query.outedge(doc)->set_vector(fe);

//      scores[fe.score] = fe;
    }
//    for (auto rit = scores.crbegin(); rit != scores.crend(); ++rit) {
//      std::cout << "Score " << query.id()
//                << ": " << rit->second.str() << std::endl;
//    }
  }
示例#15
0
  /** 
   * calc distance between two items.
   * Let a be all the users rated item 1
   * Let b be all the users rated item 2
   *
   * 3) Using Pearson correlation
   *      Dist_ab = (a - mean)*(b- mean)' / (std(a)*std(b))
   *
   * 4) Using cosine similarity:
   *      Dist_ab = (a*b) / sqrt(sum_sqr(a)) * sqrt(sum_sqr(b)))
   *
   *    5) Using chebychev:
   *          Dist_ab = max(abs(a-b))
   *
   * 6) Using manhatten distance:
   *      Dist_ab = sum(abs(a-b))
   *
   * 7) Using tanimoto:
   *      Dist_ab = 1.0 - [(a*b) / (sum_sqr(a) + sum_sqr(b) - a*b)]
   *
   * 8) Using log likelihood similarity
   *      Dist_ab = 1.0 - 1.0/(1.0 + loglikelihood)
   *
   * 9) Using Jaccard:
   *      Dist_ab = intersect(a,b) / (size(a) + size(b) - intersect(a,b)) 
   */
  double calc_distance(graphchi_vertex<VertexDataType, EdgeDataType> &v, vid_t pivot, int distance_metric) {
    //assert(is_pivot(pivot));
    //assert(is_item(pivot) && is_item(v.id()));
    dense_adj &pivot_edges = adjs[pivot - pivot_st];
    int num_edges = v.num_edges();

    dense_adj item_edges; 
    for(int i=0; i < num_edges; i++){ 
      set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data());
    }

    if (distance_metric == JACCARD_WEIGHT){
      return calc_jaccard_weight_distance(pivot_edges.edges, item_edges.edges, get_val( pivot_edges.edges, v.id()), 0);
    }
    return NAN;  
  }
示例#16
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    if (vertex.num_edges() == 0 || vdata.seed) //no edges, nothing to do here
      return;
    
    vec ret = zeros(D);
    double normalization = 0;
    for(int e=0; e < vertex.num_edges(); e++) {
      edge_data edge = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      ret += edge.cooccurence_count * nbr_latent.pvec;
      normalization += edge.cooccurence_count;
    }

    ret /= normalization;
    vdata.pvec = alpha * vdata.pvec + (1-alpha)*ret;
  }
示例#17
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    mat XtX = mat::Zero(D, D); 
    vec Xty = vec::Zero(D);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      float observation = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Xty += nbr_latent.pvec * observation;
      XtX += nbr_latent.pvec * nbr_latent.pvec.transpose();
      if (compute_rmse) {
        double prediction;
        rmse_vec[omp_get_thread_num()] += sparse_als_predict(vdata, nbr_latent, observation, prediction);
      }
    }

    double regularization = lambda;
    if (regnormal)
      lambda *= vertex.num_edges();
    for(int i=0; i < D; i++) XtX(i,i) += regularization;


    bool isuser = vertex.id() < (uint)M;
    if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || 
        (algorithm == SPARSE_ITM_FACTOR && !isuser)){ 
      double sparsity_level = 1.0;
      if (isuser)
        sparsity_level -= user_sparsity;
      else sparsity_level -= movie_sparsity;
      vdata.pvec = CoSaMP(XtX, Xty, (int)ceil(sparsity_level*(double)D), 10, 1e-4, D); 
    }
    else vdata.pvec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
示例#18
0
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		
		assert(vertex.num_inedges() * vertex.num_outedges() <= product);

		for(int i=0; i<vertex.num_outedges(); i++){
			bidirectional_label edgedata = vertex.outedge(i)->get_data();
			if(edgedata.is_equal()){		
				if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){
					lock.lock();
						fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
					lock.unlock();
					continue;
				}
			}
			lock.lock();
			fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
			lock.unlock();
		}
	}
示例#19
0
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		
	//	assert(vertex.num_inedges() * vertex.num_outedges() <= product);
		if(vertex.num_edges() == 0)
			return;
		if(gcontext.iteration == 0){
			VertexDataType vertexdata = vertex.get_data();
			if(!vertexdata.confirmed){
				lock.lock();
				left++;
				lock.unlock();
				return;
			}

			if(vertexdata.confirmed && vertexdata.reconfirmed){
				lock.lock();
				middle++;	
				lock.unlock();
			}else{
				lock.lock();
				right++;
				lock.unlock();
			}
		}	
		/*	
		for(int i=0; i<vertex.num_outedges(); i++){
			bidirectional_label edgedata = vertex.outedge(i)->get_data();
			if(edgedata.is_equal()){		
				if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){
					lock.lock();
						fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
					lock.unlock();
					continue;
				}
			}
			lock.lock();
			fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
			lock.unlock();
		}
		*/
	}
    /**
      * Pagerank update function.
      */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &ginfo) {
        float sum=0;
	float prv = 0.0;
	float pagerankcont = 0.0;	

        if (ginfo.iteration == 0) {
            /* On first iteration, initialize vertex and out-edges. 
               The initialization is important,
               because on every run, GraphChi will modify the data in the edges on disk. 
             */
	    /* For the weighted version */
	    update_edge_data(v, 1.0, true);
            v.set_data(RANDOMRESETPROB); 
            //v.set_data(1.0); 
        } else {
	    /* We need to come up with the weighted version */
            for(int i=0; i < v.num_inedges(); i++) {
                chivector<float> * evector = v.inedge(i)->get_vector();
                assert(evector->size() >= 2);
                sum += evector->get(1);
    		//std::cout <<  v.id() << " with data: " << evector->get(1) << " with weight " << evector->get(0) << std::endl;
    		//std::cout <<  v.id() << " edge endpoint: " << v.inedge(i)->vertex_id() << std::endl;
		//evector->clear();
	    }

            /* Compute my pagerank */
            prv = RANDOMRESETPROB + (1 - RANDOMRESETPROB) * sum;
	    //std::cout << "sum" << sum << "pagerank: " << prv << std::endl;

	    update_edge_data(v, prv, false);
            /* Keep track of the progression of the computation.
               GraphChi engine writes a file filename.deltalog. */
	    double delta = std::abs(prv - v.get_data());
	    //std::cout << "pagerank: " << prv << "v.data" << v.get_data() << "delta: " << delta << std::endl;
            ginfo.log_change(delta);
            
            /* Set my new pagerank as the vertex value */
            v.set_data(prv);
        }
    }
示例#21
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    vdata.rmse = 0;
    mat XtX = mat::Zero(NLATENT, NLATENT); 
    vec Xty = vec::Zero(NLATENT);

    bool compute_rmse = (vertex.num_outedges() > 0);
    // Compute XtX and Xty (NOTE: unweighted)
    for(int e=0; e < vertex.num_edges(); e++) {
      float observation = vertex.edge(e)->get_data();                
      vertex_data & nbr_latent = latent_factors_inmem[vertex.edge(e)->vertex_id()];
      Map<vec> X(nbr_latent.pvec, NLATENT);
      Xty += X * observation;
      XtX += X * X.transpose();
      if (compute_rmse) {
        double prediction;
        vdata.rmse += sparse_als_predict(vdata, nbr_latent, observation, prediction);
      }
    }

    for(int i=0; i < NLATENT; i++) XtX(i,i) += (lambda); // * vertex.num_edges();

    bool isuser = vertex.id() < (uint)M;
    Map<vec> vdata_vec(vdata.pvec, NLATENT);
    if (algorithm == SPARSE_BOTH_FACTORS || (algorithm == SPARSE_USR_FACTOR && isuser) || 
        (algorithm == SPARSE_ITM_FACTOR && !isuser)){ 
      double sparsity_level = 1.0;
      if (isuser)
        sparsity_level -= user_sparsity;
      else sparsity_level -= movie_sparsity;
      vdata_vec = CoSaMP(XtX, Xty, ceil(sparsity_level*(double)NLATENT), 10, 1e-4, NLATENT); 
    }
    else vdata_vec = XtX.selfadjointView<Eigen::Upper>().ldlt().solve(Xty);
  }
    /**
     *  Vertex update function.
     *  On first iteration ,each vertex chooses a label = the vertex id.
     *  On subsequent iterations, each vertex chooses the minimum of the neighbor's
     *  label (and itself).
     */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext)
    {
        /* This program requires selective scheduling. */
        assert(gcontext.scheduler != NULL);

        if(gcontext.iteration == 0)
        {
            set_data(vertex, vertex.id());
            /* Schedule neighbor for update */
            gcontext.scheduler->add_task(vertex.id());
            return;
        }
        else
        {
            vid_t curmin = vertex_values[vertex.id()];
            for(int i=0; i < vertex.num_edges(); i++)
            {
                vid_t nblabel = neighbor_value(vertex.edge(i));
                curmin = std::min(nblabel, curmin);

            }
            if ( curmin < vertex.get_data() )
            {
                for(int i=0; i < vertex.num_edges(); i++)
                {
                    if (curmin < neighbor_value(vertex.edge(i)))
                    {
                        /* Schedule neighbor for update */
                        gcontext.scheduler->add_task(vertex.edge(i)->vertex_id());
                    }
                }
                set_data(vertex, curmin);
            }
        }



        /* On subsequent iterations, find the minimum label of my neighbors */


        /* If my label changes, schedule neighbors */


    }
示例#23
0
    /**
     * Grab pivot's adjacency list into memory.
     */
    int load_edges_into_memory(graphchi_vertex<uint32_t, edge_data> &v) {
      assert(is_pivot(v.id()));
      assert(is_user(v.id()));

      int num_edges = v.num_edges();

      dense_adj dadj;
      for(int i=0; i<num_edges; i++) 
        set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight);
      //dadj.ratings = zeros(N);
      dadj.vid = v.id();
      adjs[v.id() - pivot_st] = dadj;
      assert(v.id() - pivot_st < adjs.size());
      __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/);
      return num_edges;
    }
示例#24
0
  /**
   * Grab pivot's adjacency list into memory.
   */
  int load_edges_into_memory(graphchi_vertex<VertexDataType, EdgeDataType> &v) {
    //assert(is_pivot(v.id()));
    //assert(is_item(v.id()));

    int num_edges = v.num_edges();
    //not enough user rated this item, we don't need to compare to it
    if (num_edges < min_allowed_intersection){
      if (debug)
        logstream(LOG_DEBUG)<<"Skipping since num edges: " << num_edges << std::endl;
      return 0;
    }


    // Count how many neighbors have larger id than v
    dense_adj dadj;
    for(int i=0; i<num_edges; i++) 
      set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data());

    //std::sort(&dadj.adjlist[0], &dadj.adjlist[0] + num_edges);
    adjs[v.id() - pivot_st] = dadj;
    assert(v.id() - pivot_st < adjs.size());
    __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/);
    return num_edges;
  }
示例#25
0
  /** The actual LambdaRank implementation. */
  virtual void compute_gradients(
      graphchi_vertex<TypeVertex, FeatureEdge> &query, Gradient* umodel) {
    std::vector<double> lambdas(query.num_outedges());
    std::vector<double> s_is(query.num_outedges());

    /* First, we compute all the outputs... */
    for (int i = 0; i < query.num_outedges(); i++) {
      s_is[i] = get_score(query.outedge(i));
//      std::cout << "s[" << i << "] == " << s_is[i] << std::endl;
    }
    /* ...and the retrieval measure scores. */
    opt.compute(query);


    /* Now, we compute the errors (lambdas). */
    for (int i = 0; i < query.num_outedges() - 1; i++) {
      int rel_i = get_relevance(query.outedge(i));
      for (int j = i + 1; j < query.num_outedges(); j++) {
        int rel_j = get_relevance(query.outedge(j));
        if (rel_i != rel_j) {
          double S_ij = rel_i > rel_j ? 1 : -1;
          double lambda_ij = dC_per_ds_i(S_ij, s_is[i], s_is[j]) *
                             fabs(opt.delta(query, i, j));
          /* lambda_ij = -lambda_ji */
          lambdas[i] += lambda_ij;
          lambdas[j] -= lambda_ij;
        }
      }
    }

    /* Finally, the model update. */
    for (int i = 0; i < query.num_outedges(); i++) {
      // -lambdas[i], as C is a utility function in this case
      umodel->update(query.outedge(i)->get_vector()->get_data(), s_is[i], lambdas[i]);
    }
  }
示例#26
0
	void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {
		if(vertex.num_edges() == 0)
			return ;
		VertexDataType vertexdata = vertex.get_data();
		if(vertexdata.confirmed && vertexdata.reconfirmed)
			return ;	
		//assert(vertex.num_inedges() * vertex.num_outedges() <= product);
		if (gcontext.iteration == 0){
			if(vertexdata.confirmed){
				vertexdata.color =	getNewIdRight(); 
			}else{
				vertexdata.color = getNewIdLeft();
			}	
			vertex.set_data(vertexdata);
		}else{ 
			/*
			for(int i=0; i<vertex.num_outedges(); i++){
				bidirectional_label edgedata = vertex.outedge(i)->get_data();
				if(edgedata.is_equal()){		
					if(root == edgedata.my_label(vertex.id(), vertex.outedge(i)->vertexid)){
						lock.lock();
						fprintf(fpout, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
						lock.unlock();
						continue;
					}
				}
				lock.lock();
				fprintf(fpout1, "%u\t%u\n", vertex.id(), vertex.outedge(i)->vertexid);
				lock.unlock();
			}
			*/
			lock.lock();
			fprintf(vmap, "%u\t%u\n", vertex.id(), vertexdata.color);
			lock.unlock();
		}
	}
示例#27
0
    /**
     *  Vertex update function.
     */
    void update(graphchi_vertex<VertexDataType, edge_data> &v, graphchi_context &gcontext) {
        if (debug)
            printf("Entered iteration %d with %d\n", gcontext.iteration, is_item(v.id()) ? (v.id() - M + 1): v.id());

        /* Even iteration numbers:
         * 1) load a subset of users into memory (pivots)
         * 2) Find which subset of items is connected to the users
         */
        if (gcontext.iteration % 2 == 0) {
            if (adjcontainer->is_pivot(v.id()) && is_user(v.id())) {
                adjcontainer->load_edges_into_memory(v);
                if (debug)
                    printf("Loading pivot %d intro memory\n", v.id());
            }
        }
        /* odd iteration number:
        * 1) For any item connected to a pivot item
        *       compute itersection
        */
        else {
            assert(is_item(v.id()));

            for (int i=0; i< v.num_edges(); i++) {
                if (!adjcontainer->is_pivot(v.edge(i)->vertex_id()))
                    continue;
                if (debug)
                    printf("comparing user pivot %d to item %d\n", v.edge(i)->vertex_id()+1 , v.id() - M + 1);

                adjcontainer->compute_ratings(v, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight);
                item_pairs_compared++;

                if (item_pairs_compared % 1000000 == 0)
                    Rcpp::Rcout<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl;
            }
        }//end of iteration % 2 == 1
    }//end of update function
    /**
      * Update the weigthed edge chivector
      * We first obtain the edge weight from the first element, sum them, then update the 
      * second item by eacg edge's weight
      */
    void update_edge_data(graphchi_vertex<VertexDataType, EdgeDataType> &v, float quota, bool first){
	float sum = 0.0;
	//if(first)
            for(int i=0; i < v.num_outedges(); i++) {
                graphchi_edge<EdgeDataType> * edge = v.outedge(i);
                if (edge != NULL) {
                    chivector<float> * evector = edge->get_vector();
		    //std::cout << evector->size() << std::endl;
		    /*if (first)
                        assert(evector->size() == 1);
		    else
                        assert(evector->size() == 2);
                    assert(evector->size() == 2);*/
    	            std::cout <<  v.id() << " with data: " << evector->get(0) << std::endl;
	            sum += evector->get(0);
		    /*if (first){
                        evector->add(sum);
                	assert(evector->size() == 2);
		    }*/
	        }
            }
	
        for(int i=0; i < v.num_outedges(); i++) {
            graphchi_edge<EdgeDataType> * edge = v.outedge(i);
            if (edge != NULL) {
                chivector<float> * evector = edge->get_vector();
//                assert(evector->size() == 2);
		float val = quota * evector->get(0) / sum;
		//evector->set(1, val);
		if(first && (evector->size() == 1))
                        evector->add(val);	
		evector->set(1, val);
    		//std::cout <<  v.id() << " with data: " << evector->get(0) << std::endl;
	    }
         }
    }
 /**
  * This method runs only for the query nodes. Its actual function is divided
  * into several methods, as not all is needed in each phase.
  */
 void update(graphchi_vertex<TypeVertex, FeatureEdge> &v,
             graphchi_context &ginfo) {
   // TODO Use a scheduler instead of this?
   if (v.get_data().type == QUERY) {  // Only queries have outedges (TODO: ???)
     /* We count the number of queries. */
     if (ginfo.iteration == 0) {
       num_queries++;
     }
     score_documents(v, ginfo);
     if (phase == TRAINING) {
       compute_gradients(v, parallel_models[omp_get_thread_num()]);
     }
     if (phase == TRAINING || phase == VALIDATION || phase == TESTING) {
       evaluate_model(v, ginfo);
     }
   }
 }
示例#30
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    if (vertex.id() < (uint)mi.start || vertex.id() >= (uint)mi.end)
      return;

    vertex_data& user = latent_factors_inmem[vertex.id()];
    bool rows = vertex.id() < (uint)info.get_start_node(false);
    if (info.is_square()) 
      rows = mi.A_transpose;
    (void) rows; // unused
    assert(mi.r_offset >=0);
    //store previous value for convergence detection
    if (mi.prev_offset >= 0)
      user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset];

    double val = 0;
    assert(mi.x_offset >=0 || mi.y_offset>=0);

    /*** COMPUTE r = c*A*x  ********/
    if (mi.A_offset  && mi.x_offset >= 0){
      for(int e=0; e < vertex.num_edges(); e++) {
        const edge_data & edge = vertex.edge(e)->get_data();
        const vertex_data  & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        val += (edge.weight * movie.pvec[mi.x_offset]);
      }

      if  (info.is_square() && mi.use_diag)// add the diagonal term
        val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]);

      val *= mi.c;
    }
    /***** COMPUTE r = c*I*x  *****/
    else if (!mi.A_offset && mi.x_offset >= 0){
      val = mi.c*user.pvec[mi.x_offset];
    }

    /**** COMPUTE r+= d*y (optional) ***/
    if (mi.y_offset>= 0){
      val += mi.d*user.pvec[mi.y_offset]; 
    }

    /***** compute r = (... ) / div */
    if (mi.div_offset >= 0){
      val /= user.pvec[mi.div_offset];
    }
    assert(mi.r_offset>=0 && mi.r_offset < user.pvec.size());
    user.pvec[mi.r_offset] = val;
  } //end update