Esempio n. 1
0
float rbm_predict(const vertex_data & usr,
                  const vertex_data & mov,
                  const float rating,
                  double & prediction,
                  void * extra) {
    return rbm_predict(rbm_user((vertex_data&)usr), rbm_movie((vertex_data&)mov), rating, prediction, NULL);
}
Esempio n. 2
0
    /**
     *  Vertex update function.
     */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {


        if (gcontext.iteration == 0) {
            if (is_user(vertex.id()) && vertex.num_outedges() > 0) {
                vertex_data& user = latent_factors_inmem[vertex.id()];
                user.pvec = zeros(D*3);
                for(int e=0; e < vertex.num_outedges(); e++) {
                    rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                    float observation = vertex.edge(e)->get_data();
                    int r = (int)(observation/rbm_scaling);
                    assert(r < rbm_bins);
                    mov.bi[r]++;
                }
            }
            return;
        } else if (gcontext.iteration == 1) {
            if (vertex.num_inedges() > 0) {
                rbm_movie mov = latent_factors_inmem[vertex.id()];
                setRand2(mov.w, D*rbm_bins, 0.001);
                for(int r = 0; r < rbm_bins; ++r) {
                    mov.bi[r] /= (double)vertex.num_inedges();
                    mov.bi[r] = log(1E-9 + mov.bi[r]);

                    if (mov.bi[r] > 1000) {
                        assert(false);
                        Rcpp::Rcerr<<"Numerical overflow" <<std::endl;
                    }
                }
            }

            return; //done with initialization
        }
        //go over all user nodes
        if (is_user(vertex.id()) && vertex.num_outedges()) {
            vertex_data & user = latent_factors_inmem[vertex.id()];
            user.pvec = zeros(3*D);
            rbm_user usr(user);

            vec v1 = zeros(vertex.num_outedges());
            //go over all ratings
            for(int e=0; e < vertex.num_outedges(); e++) {
                float observation = vertex.edge(e)->get_data();
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                int r = (int)(observation / rbm_scaling);
                assert(r < rbm_bins);
                for(int k=0; k < D; k++) {
                    usr.h[k] += mov.w[D*r + k];
                    assert(!std::isnan(usr.h[k]));
                }
            }

            for(int k=0; k < D; k++) {
                usr.h[k] = sigmoid(usr.h[k]);
                if (drand48() < usr.h[k])
                    usr.h0[k] = 1;
                else usr.h0[k] = 0;
            }


            int i = 0;
            double prediction;
            for(int e=0; e < vertex.num_outedges(); e++) {
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                float observation = vertex.edge(e)->get_data();
                predict1(usr, mov, observation, prediction);
                int vi = (int)(prediction / rbm_scaling);
                v1[i] = vi;
                i++;
            }

            i = 0;
            for(int e=0; e < vertex.num_outedges(); e++) {
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                int r = (int)v1[i];
                for (int k=0; k< D; k++) {
                    usr.h1[k] += mov.w[r*D+k];
                }
                i++;
            }

            for (int k=0; k < D; k++) {
                usr.h1[k] = sigmoid(usr.h1[k]);
                if (drand48() < usr.h1[k])
                    usr.h1[k] = 1;
                else usr.h1[k] = 0;
            }

            i = 0;
            for(int e=0; e < vertex.num_outedges(); e++) {
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                float observation = vertex.edge(e)->get_data();
                double prediction;
                rbm_predict(user, mov, observation, prediction, NULL);
                double pui = prediction / rbm_scaling;
                double rui = observation / rbm_scaling;
                rmse_vec[omp_get_thread_num()] += (pui - rui) * (pui - rui);
                //nn += 1.0;
                int vi0 = (int)(rui);
                int vi1 = (int)v1[i];
                for (int k = 0; k < D; k++) {
                    mov.w[D*vi0+k] += rbm_alpha * (usr.h0[k] - rbm_beta * mov.w[vi0*D+k]);
                    assert(!std::isnan(mov.w[D*vi0+k]));
                    mov.w[D*vi1+k] -= rbm_alpha * (usr.h1[k] + rbm_beta * mov.w[vi1*D+k]);
                    assert(!std::isnan(mov.w[D*vi1+k]));
                }
                i++;
            }
        }
    }
Esempio n. 3
0
  /**
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    //compute only for user nodes
    if (vertex.id() >= std::min(M,(uint)end_user) || vertex.id() < (uint)start_user)
      return;

    vertex_data & vdata = latent_factors_inmem[vertex.id()];
    int howmany = (int)(N*knn_sample_percent);
    assert(howmany > 0 );
    if (vertex.num_outedges() == 0){
       mymutex.lock();
       users_without_ratings++;
       mymutex.unlock();
    }

    vec distances = zeros(howmany);
    ivec indices = ivec::Zero(howmany);
    for (int i=0; i< howmany; i++){
      indices[i]= -1;
    }
    std::vector<bool> curratings;
    curratings.resize(N);
    for(int e=0; e < vertex.num_edges(); e++) {
      //no need to calculate this rating since it is given in the training data reference
      assert(vertex.edge(e)->vertex_id() - M >= 0 && vertex.edge(e)->vertex_id() - M < N);
      curratings[vertex.edge(e)->vertex_id() - M] = true;
    }
    if (knn_sample_percent == 1.0){
      for (uint i=M; i< M+N; i++){
        if (curratings[i-M])
          continue;
        vertex_data & other = latent_factors_inmem[i];
        double dist;
        if (algo == SVDPP)
          svdpp_predict(vdata, other, 0, dist); 
        else if (algo == BIASSGD) 
	  biassgd_predict(vdata, other, 0, dist);
        else if (algo == RBM)
          rbm_predict(vdata, other, 0, dist);
        else assert(false);
        indices[i-M] = i-M;
        distances[i-M] = dist + 1e-10;
      }
    }
    else for (int i=0; i<howmany; i++){
      int random_other = ::randi(M, M+N-1);
      vertex_data & other = latent_factors_inmem[random_other];
      double dist;
      if (algo == SVDPP)
        svdpp_predict(vdata, other, 0, dist); 
      else if (algo == BIASSGD)
        biassgd_predict(vdata, other, 0, dist);
      else if (algo == RBM)
        rbm_predict(vdata, other, 0, dist);
      else assert(false);
        
      indices[i] = random_other-M;
      distances[i] = dist;
    }

    vec out_dist(num_ratings);
    ivec indices_sorted = reverse_sort_index2(distances, indices, out_dist, num_ratings);
    assert(indices_sorted.size() <= num_ratings);
    assert(out_dist.size() <= num_ratings);
    vdata.ids = indices_sorted;
    vdata.ratings = out_dist;
    if (debug)
      printf("Closest is: %d with distance %g\n", (int)vdata.ids[0], vdata.ratings[0]);

    if (vertex.id() % 1000 == 0)
      printf("Computing recommendations for user %d at time: %g\n", vertex.id()+1, mytimer.current_time());
  }