std::string save_vertex(const graph_type::vertex_type& v) {
  std::stringstream sstream;
  if (is_user(v)) {
    const std::vector<std::pair<double, graphlab::vertex_id_type> >& top_rated 
        = v.data().top_rated;
    const std::vector<std::pair<double, graphlab::vertex_id_type> >& top_pred
        = v.data().top_pred;
    if (top_rated.size() < 10 || top_pred.size() == 0) {
      return "";
    }
    // save top rated
    sstream << v.id() << " "; 
    sstream << pair2str(top_rated[0]); 
    for (size_t i = 1; i < top_rated.size(); ++i) {
      sstream << "," <<  (pair2str(top_rated[i]));
    }
    // save top pred
    sstream << " ";
    sstream << pair2str(top_pred[0]);
    for (size_t i = 1; i < top_pred.size(); ++i) {
      sstream << "," << (pair2str(top_pred[i]));
    }
    sstream << "\n";
    return sstream.str();
  } else {
    return "";
  }
 }
Beispiel #2
0
void edit_mail_mode(USER_DATA *usr, char *argument) {
	char arg[INPUT];

	while (isspace(*argument))
		argument++;

	smash_tilde(argument);
	usr->timer = 0;

	argument = one_argument(argument, arg);

	if (arg[0] == '\0') {
		func_rnew_mail(usr);
		return;
	} else if (!str_cmp(arg, "?") || !str_cmp(arg, "h")) {
		do_help(usr, "MAIL_DATA-INDEX");
		return;
	} else if (!str_cmp(arg, "l")) {
		func_list_mail(usr);
		return;
	} else if (!str_cmp(arg, "r")) {
		func_reply_mail(usr, argument);
		return;
	} else if (!str_cmp(arg, "d")) {
		func_delete_mail(usr, argument);
		return;
	} else if (!str_cmp(arg, "q")) {
		func_quit_mail(usr);
		return;
	} else if (!str_cmp(arg, "c")) {
		if (argument[0] == '\0') {
			syntax("[#Wc#x]ompose <user name>", usr);
			return;
		}

		if (!is_user(argument)) {
			send_to_user("No such user.\n\r", usr);
			return;
		}

		if (is_enemy(usr, argument)) {
			send_to_user("You can't sent mail to your enemies.\n\r", usr);
			return;
		}

		mail_attach(usr);
		if (usr->pCurrentMail->to)
			free_string(usr->pCurrentMail->to);
		usr->pCurrentMail->to = str_dup(argument);
		EDIT_MODE(usr) = EDITOR_MAIL_SUBJECT;
		return;
	} else if (is_number(arg)) {
		func_read_mail(usr, arg);
		return;
	} else {
		send_to_user(
				"Unknown mail command, try '?' in order to show help.\n\r", usr);
		return;
	}
}
Beispiel #3
0
void do_mail(USER_DATA *usr, char *argument) {
	char arg[INPUT];

	one_argument(argument, arg);

	if (arg[0] == '\0') {
		print_to_user(usr, "\n\rTotal messages: %-3d\n\r\n\r", count_mail(usr));
		do_help(usr, "MAIL_DATA-INDEX");
		EDIT_MODE(usr) = EDITOR_MAIL;
		return;
	}

	if (!is_user(arg)) {
		send_to_user("No such user.\n\r", usr);
		return;
	}

	if (is_enemy(usr, arg)) {
		send_to_user("You can't sent mail to your enemies.\n\r", usr);
		return;
	}

	mail_attach(usr);
	if (usr->pCurrentMail->to)
		free_string(usr->pCurrentMail->to);
	usr->pCurrentMail->to = str_dup(arg);
	EDIT_MODE(usr) = EDITOR_MAIL_SUBJECT;
}
void collect_function (engine_type::context_type& context,
                       graph_type::vertex_type& vertex) {
  if (is_user(vertex)) {
    map_join_pair sum = context.map_reduce<map_join_pair>(COLLECT_TASK, ALL_EDGES);
    vertex.data().top_rated = sum.first.get_top_k(10);
    vertex.data().top_pred = sum.second.get_top_k(5);
  }
}
Beispiel #5
0
void finger_mail(USER_DATA *usr, char *name) {
	MAIL_DATA *pMail;
	MAIL_DATA *fMailFirst;
	MAIL_DATA *fMailLast;
	USER_DATA *to;
	char buf[INPUT];
	FILE *fpMail;
	int count = 0;
	int new_count = 0;

	if (!(to = get_user(name))) {
		if (!is_user(name)) {
			bbs_bug("Finger_mail: No such user %s", name);
			send_to_user("ERROR: No such user.\n\r", usr);
			return;
		}
		fMailFirst = NULL;
		fMailLast = NULL;

		sprintf(buf, "%s%s", MAIL_DIR, capitalize(name));
		if (!(fpMail = fopen(buf, "r"))) {
			bbs_bug("Finger_mail: Could not open to read %s", buf);
			/*	    send_to_user("ERROR: Could not open mail file.\n\r", usr);
			 BAXTER */
			send_to_user("Mail: No new messages.\n\r\n\r", usr);
			return;
		}

		while ((pMail = read_mail(fpMail)) != NULL)
			LINK(pMail, fMailFirst, fMailLast);

		for (pMail = fMailFirst; pMail; pMail = pMail->next) {
			if (pMail) {
				if (pMail->stamp_time > pMail->read_time)
					new_count++;

				count++;
			}
		}

		if (new_count > 0)
			sprintf(buf, "Mail: %d new message%s.\n\r\n\r", new_count,
					new_count > 1 ? "s" : "");
		else
			sprintf(buf, "Mail: No new messages.\n\r\n\r");
		send_to_user(buf, usr);
		return;
	}

	if (unread_mail(to) > 0)
		sprintf(buf, "Mail: %d new message%s.\n\r\n\r", unread_mail(to),
				unread_mail(to) > 1 ? "s" : "");
	else
		sprintf(buf, "Mail: No new messages.\n\r\n\r");
	send_to_user(buf, usr);
	return;
}
Beispiel #6
0
void
db_destroy_object(Objid oid)
{
    Object *o = dbpriv_find_object(oid);
    Verbdef *v, *w;
    int i;

    db_priv_affected_callable_verb_lookup();

    if (!o)
	panic("DB_DESTROY_OBJECT: Invalid object!");

    if (o->location != NOTHING || o->contents != NOTHING
	|| o->parent != NOTHING || o->child != NOTHING)
	panic("DB_DESTROY_OBJECT: Not a barren orphan!");

    if (is_user(oid)) {
	Var t;

	t.type = TYPE_OBJ;
	t.v.obj = oid;
	all_users = setremove(all_users, t);
    }
    free_str(o->name);

    for (i = 0; i < o->propdefs.cur_length; i++) {
	/* As an orphan, the only properties on this object are the ones
	 * defined on it directly, so these two arrays must be the same length.
	 */
	free_str(o->propdefs.l[i].name);
	free_var(o->propval[i].var);
    }
    if (o->propval)
	myfree(o->propval, M_PVAL);
    if (o->propdefs.l)
	myfree(o->propdefs.l, M_PROPDEF);

    for (v = o->verbdefs; v; v = w) {
	if (v->program)
	    free_program(v->program);
	free_str(v->name);
	w = v->next;
	myfree(v, M_VERBDEF);
    }

    myfree(objects[oid], M_OBJECT);
    objects[oid] = 0;
}
Beispiel #7
0
    /**
     * Grab pivot's adjacency list into memory.
     */
    int load_edges_into_memory(graphchi_vertex<uint32_t, edge_data> &v) {
      assert(is_pivot(v.id()));
      assert(is_user(v.id()));

      int num_edges = v.num_edges();

      dense_adj dadj;
      for(int i=0; i<num_edges; i++) 
        set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight);
      //dadj.ratings = zeros(N);
      dadj.vid = v.id();
      adjs[v.id() - pivot_st] = dadj;
      assert(v.id() - pivot_st < adjs.size());
      __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/);
      return num_edges;
    }
    /**
     *  Vertex update function.
     */
    void update(graphchi_vertex<VertexDataType, edge_data> &v, graphchi_context &gcontext) {
        if (debug)
            printf("Entered iteration %d with %d\n", gcontext.iteration, is_item(v.id()) ? (v.id() - M + 1): v.id());

        /* Even iteration numbers:
         * 1) load a subset of users into memory (pivots)
         * 2) Find which subset of items is connected to the users
         */
        if (gcontext.iteration % 2 == 0) {
            if (adjcontainer->is_pivot(v.id()) && is_user(v.id())) {
                adjcontainer->load_edges_into_memory(v);
                if (debug)
                    printf("Loading pivot %d intro memory\n", v.id());
            }
        }
        /* odd iteration number:
        * 1) For any item connected to a pivot item
        *       compute itersection
        */
        else {
            assert(is_item(v.id()));

            for (int i=0; i< v.num_edges(); i++) {
                if (!adjcontainer->is_pivot(v.edge(i)->vertex_id()))
                    continue;
                if (debug)
                    printf("comparing user pivot %d to item %d\n", v.edge(i)->vertex_id()+1 , v.id() - M + 1);

                adjcontainer->compute_ratings(v, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight);
                item_pairs_compared++;

                if (item_pairs_compared % 1000000 == 0)
                    Rcpp::Rcout<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl;
            }
        }//end of iteration % 2 == 1
    }//end of update function
Beispiel #9
0
    /**
     *  Vertex update function.
     */
    void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {


        if (gcontext.iteration == 0) {
            if (is_user(vertex.id()) && vertex.num_outedges() > 0) {
                vertex_data& user = latent_factors_inmem[vertex.id()];
                user.pvec = zeros(D*3);
                for(int e=0; e < vertex.num_outedges(); e++) {
                    rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                    float observation = vertex.edge(e)->get_data();
                    int r = (int)(observation/rbm_scaling);
                    assert(r < rbm_bins);
                    mov.bi[r]++;
                }
            }
            return;
        } else if (gcontext.iteration == 1) {
            if (vertex.num_inedges() > 0) {
                rbm_movie mov = latent_factors_inmem[vertex.id()];
                setRand2(mov.w, D*rbm_bins, 0.001);
                for(int r = 0; r < rbm_bins; ++r) {
                    mov.bi[r] /= (double)vertex.num_inedges();
                    mov.bi[r] = log(1E-9 + mov.bi[r]);

                    if (mov.bi[r] > 1000) {
                        assert(false);
                        Rcpp::Rcerr<<"Numerical overflow" <<std::endl;
                    }
                }
            }

            return; //done with initialization
        }
        //go over all user nodes
        if (is_user(vertex.id()) && vertex.num_outedges()) {
            vertex_data & user = latent_factors_inmem[vertex.id()];
            user.pvec = zeros(3*D);
            rbm_user usr(user);

            vec v1 = zeros(vertex.num_outedges());
            //go over all ratings
            for(int e=0; e < vertex.num_outedges(); e++) {
                float observation = vertex.edge(e)->get_data();
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                int r = (int)(observation / rbm_scaling);
                assert(r < rbm_bins);
                for(int k=0; k < D; k++) {
                    usr.h[k] += mov.w[D*r + k];
                    assert(!std::isnan(usr.h[k]));
                }
            }

            for(int k=0; k < D; k++) {
                usr.h[k] = sigmoid(usr.h[k]);
                if (drand48() < usr.h[k])
                    usr.h0[k] = 1;
                else usr.h0[k] = 0;
            }


            int i = 0;
            double prediction;
            for(int e=0; e < vertex.num_outedges(); e++) {
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                float observation = vertex.edge(e)->get_data();
                predict1(usr, mov, observation, prediction);
                int vi = (int)(prediction / rbm_scaling);
                v1[i] = vi;
                i++;
            }

            i = 0;
            for(int e=0; e < vertex.num_outedges(); e++) {
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                int r = (int)v1[i];
                for (int k=0; k< D; k++) {
                    usr.h1[k] += mov.w[r*D+k];
                }
                i++;
            }

            for (int k=0; k < D; k++) {
                usr.h1[k] = sigmoid(usr.h1[k]);
                if (drand48() < usr.h1[k])
                    usr.h1[k] = 1;
                else usr.h1[k] = 0;
            }

            i = 0;
            for(int e=0; e < vertex.num_outedges(); e++) {
                rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
                float observation = vertex.edge(e)->get_data();
                double prediction;
                rbm_predict(user, mov, observation, prediction, NULL);
                double pui = prediction / rbm_scaling;
                double rui = observation / rbm_scaling;
                rmse_vec[omp_get_thread_num()] += (pui - rui) * (pui - rui);
                //nn += 1.0;
                int vi0 = (int)(rui);
                int vi1 = (int)v1[i];
                for (int k = 0; k < D; k++) {
                    mov.w[D*vi0+k] += rbm_alpha * (usr.h0[k] - rbm_beta * mov.w[vi0*D+k]);
                    assert(!std::isnan(mov.w[D*vi0+k]));
                    mov.w[D*vi1+k] -= rbm_alpha * (usr.h1[k] + rbm_beta * mov.w[vi1*D+k]);
                    assert(!std::isnan(mov.w[D*vi1+k]));
                }
                i++;
            }
        }
    }
Beispiel #10
0
  /**
   *  Vertex update function.
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &gcontext) {
    if (debug)
      printf("Entered iteration %d with %d\n", gcontext.iteration, v.id());
 
    /* even iteration numbers:
     * 1) load a subset of items into memory (pivots)
     * 2) Find which subset of items needs to compared to the users
     */
    if (gcontext.iteration % 2 == 0) {
      if (adjcontainer->is_pivot(v.id()) && is_item(v.id())){
        adjcontainer->load_edges_into_memory(v);         
        if (debug)
          printf("Loading pivot %dintro memory\n", v.id());
      }
      else if (is_user(v.id())){

        //in the zero iteration, if using AA/RA/PROB distance metric, initialize array
        //with node degrees 
        if (gcontext.iteration == 0 && (distance_metric == AA || distance_metric == RA || distance_metric == PROB)){
           latent_factors_inmem[v.id()].degree = v.num_edges();
        }

        //check if this user is connected to any pivot item
        bool has_pivot = false;
        int pivot = -1;
        for(int i=0; i<v.num_edges(); i++) {
          graphchi_edge<uint32_t> * e = v.edge(i);
          //assert(is_item(e->vertexid)); 
          if (adjcontainer->is_pivot(e->vertexid)) {
            has_pivot = true;
            pivot = e->vertexid;
            break;
          }
        }
        if (debug)
          printf("user %d is linked to pivot %d\n", v.id(), pivot);
        if (!has_pivot) //this user is not connected to any of the pivot item nodes and thus
          //it is not relevant at this point
          return; 

        //this user is connected to a pivot items, thus all connected items should be compared
        for(int i=0; i<v.num_edges(); i++) {
          graphchi_edge<uint32_t> * e = v.edge(i);
          //assert(v.id() != e->vertexid);
          relevant_items[e->vertexid - M] = true;
        }
      }//is_user 

    } //iteration % 2 =  1
    /* odd iteration number:
     * 1) For any item connected to a pivot item
     *       compute itersection
     */
    else {
      if (!relevant_items[v.id() - M]){
        if (debug)
          logstream(LOG_DEBUG)<<"Skipping item: " << v.id() << " since not relevant" << std::endl;
        return;
      }
      std::vector<index_val> heap;

      

      for (vid_t i=adjcontainer->pivot_st; i< adjcontainer->pivot_en; i++){
        //if JACCARD which is symmetric, compare only to pivots which are smaller than this item id
        if ((distance_metric != ASYM_COSINE && i >= v.id()) || (!relevant_items[i-M]))
          continue;
        else if (distance_metric == ASYM_COSINE && i == v.id())
          continue;
        
        double dist = adjcontainer->calc_distance(v, i, distance_metric);
        item_pairs_compared++;
        if (item_pairs_compared % 10000000 == 0)
          logstream(LOG_INFO)<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " <<  std::setw(10) <<sum(written_pairs) << " written. " << std::endl;

        if (debug)
          printf("comparing %d to pivot %d distance is %g\n", i - M + 1, v.id() - M + 1, dist);
        if (dist != 0){
          heap.push_back(index_val(i, dist)); 
        }
        else zero_dist++;
      }
      sort(heap.begin(), heap.end(), &Greater);
      int thread_num = omp_get_thread_num();
      if (heap.size() < K)
        not_enough++;
      for (uint i=0; i< std::min(heap.size(), (size_t)K); i++){
          int rc = fprintf(out_files[thread_num], "%u %u %.12lg\n", v.id()-M+1, heap[i].index-M+1, (double)heap[i].val);//write item similarity to file
          written_pairs[omp_get_thread_num()]++;
         if (rc <= 0){
            perror("Failed to write output");
            logstream(LOG_FATAL)<<"Failed to write output to: file: " << training << omp_get_thread_num() << ".out" << std::endl;  
         }
      }
    }//end of iteration % 2 == 1
  }//end of update function
Beispiel #11
0
  /** 
   * calc distance between two items.
   * Let a be all the users rated item 1
   * Let b be all the users rated item 2
   * Let intersection (a,b) be the number of users rated both items
   * Let size(a) be the number of users rated item 1
   * Let size(b) be the number of users rated item 2
   * 
   * Only for prob similarity:
   * Let M be the total number of users
   * Let N be the total number of iterms
   * Let L be the total number of training ratings
   *
   * 0) Using Jackard index:
   *      Dist_12 = intersection(a,b) / (size(a) + size(b) - size(intersection(a,b))
   *
   * 1) Using AA index:
   *      Dist_12 = sum_user k in intersection(a,b) [ 1 / log(degree(k)) ] 
   *
   * 2) Using RA index:
   *      Dist_12 = sum_user k in intersection(a,b) [ 1 / degree(k) ] 
   *
   * 3) Using Asym Cosine:
   *      Dist_12 = intersection(a,b) / size(a)^alpha * size(b)^(1-alpha)
   * 
   * 4) Using prob similarity:
   *      Dist_12 = intersection(a,b) / [ sum(user k  in b) p(k,1) ]
   *      where p(k,1) = 1 / [ 1 + (L / (MN-L)) ((N - degree(k))/degree(K)) * ((M - degree(1)) / degree(1)) ]
   *                                    
   */
  double calc_distance(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int distance_metric) {
    //assert(is_pivot(pivot));
    //assert(is_item(pivot) && is_item(v.id()));
    dense_adj &pivot_edges = adjs[pivot - pivot_st];
    int num_edges = v.num_edges();
    //if there are not enough neighboring user nodes to those two items there is no need
    //to actually count the intersection
    if (num_edges < min_allowed_intersection || pivot_edges.count < min_allowed_intersection)
      return 0;

    std::vector<vid_t> edges;
    edges.resize(num_edges);
    for(int i=0; i < num_edges; i++) {
      vid_t other_vertex = v.edge(i)->vertexid;
      edges[i] = other_vertex;
    }
    sort(edges.begin(), edges.end());
    
    std::set<vid_t> intersection;
    std::set_intersection(
        pivot_edges.adjlist, pivot_edges.adjlist + pivot_edges.count, 
        edges.begin(), edges.end(), 
        std::inserter(intersection, intersection.begin()));
      
    double intersection_size = (double)intersection.size();
    //not enough user nodes rated both items, so the pairs of items are not compared.
    if (intersection_size < (double)min_allowed_intersection)
        return 0;
  
    if (distance_metric == JACCARD){
      uint set_a_size = v.num_edges(); //number of users connected to current item
      uint set_b_size = acount(pivot); //number of users connected to current pivot
      return intersection_size / (double)(set_a_size + set_b_size - intersection_size); //compute the distance
    }
    else if (distance_metric == AA){
       double dist = 0;
       for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){
         vid_t user = *i;
         assert(latent_factors_inmem.size() == M && is_user(user));
         assert(latent_factors_inmem[user].degree > 0);
         dist += 1.0 / log(latent_factors_inmem[user].degree);
       }
       return dist;
    }
    else if (distance_metric == RA){
       double dist = 0;
       for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){
         vid_t user = *i;
         assert(latent_factors_inmem.size() == M && is_user(user));
         assert(latent_factors_inmem[user].degree > 0);
         dist += 1.0 / latent_factors_inmem[user].degree;
       }
       return dist;
    }
  /* 3) Using Asym Cosine:
   *      Dist_12 = intersection(a,b) / size(a)^alpha * size(b)^(1-alpha)
   */
     else if (distance_metric == ASYM_COSINE){
      uint set_a_size = v.num_edges(); //number of users connected to current item
      uint set_b_size = acount(pivot); //number of users connected to current pivot
      return intersection_size / (pow(set_a_size,asym_cosine_alpha) * pow(set_b_size,1-asym_cosine_alpha));
    }
    /* 4) Using prob similarity:
    *      Dist_12 = intersection(a,b) / [ sum(user k  in b) p(k,1) ]
    *      where p(k,1) = 1 / [ 1 + (L / (MN-L)) ((N - degree(k))/degree(K)) * ((M - degree(1)) / degree(1)) ]
    */
     else if (distance_metric == PROB){
      double sum = 0;
      for(int i=0; i<pivot_edges.count; i++) {
        int node_k = pivot_edges.adjlist[i];
        int degree_k = latent_factors_inmem[node_k].degree;
        assert(degree_k > 0);
        double p_k_1 = 1.0 / ( 1.0 + prob_sim_normalization_constant * ((N - degree_k)/(double)degree_k) * ((M - num_edges) / (double)num_edges));
        assert(p_k_1 > 0 && p_k_1 <= 1.0);
        sum += p_k_1;
      }
      return intersection_size / sum;
   }
   else { 
     assert(false);
   }

   return -1; //just to avoid warning
  }
Beispiel #12
0
  /*
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {

    //go over all user nodes
    if (is_user(vertex.id())){
      vertex_data & user = latent_factors_inmem[vertex.id()]; 
      time_svdpp_usr usr(user);

      unsigned int userRatings = vertex.num_outedges();
      double rRuNum = 1/sqrt(userRatings+10);
      int dim = D;
      double sumY = 0.0;

      //go over all ratings
      for(int e=0; e < vertex.num_outedges(); e++) {
        uint pos = vertex.edge(e)->vertex_id();
        assert(pos >= M && pos < M+N);
        vertex_data & data = latent_factors_inmem[pos];
        time_svdpp_movie movie(data);
        Map<vec> y(movie.y, D);
        sumY += sum((const vec&)y); //y
      }

      for( int k=0; k<dim; ++k) {
        usr.ptemp[k] = usr.pu[k] + rRuNum * sumY; // pTemp = pu + rRuNum*sumY
      }
      vec sum = zeros(dim);
      for(int e=0; e < vertex.num_edges(); e++) {  
        //edge_data & edge = scope.edge_data(oedgeid);
        //float rui = edge.weight;
        float rui = vertex.edge(e)->get_data().weight; 
        uint t = (uint)(vertex.edge(e)->get_data().time - 1); // we assume time bins start from 1
        assert(t < M+N+K);
        vertex_data & data = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        time_svdpp_movie mov(data);
        time_svdpp_time time(latent_factors_inmem[t]);
        double pui = 0; 
        time_svdpp_predict(usr, mov, time, rui, pui);
        double eui = rui - pui;
        *usr.bu += tsp.lrate*(eui - tsp.beta* *usr.bu);
        *mov.bi += tsp.lrate * (eui - tsp.beta* *mov.bi);

        for (int k = 0; k < dim; k++) {
          double oldValue = mov.q[k];
          double userValue = usr.ptemp[k] + usr.pu[k] * time.pt[k];
          sum[k] += eui * mov.q[k];
          mov.q[k] += tsp.lrate * (eui * userValue - tsp.gamma*mov.q[k]);
          usr.ptemp[k] += tsp.lrate * ( eui * oldValue - tsp.gamma * usr.ptemp[k]);
          usr.p[k] += tsp.lrate * ( eui * oldValue - tsp.gamma*usr.p[k] );
          usr.pu[k] += tsp.lrate * (eui * oldValue  * time.pt[k] - tsp.gamma * usr.pu[k]);
          time.pt[k] += tsp.lrate * (eui * oldValue * usr.pu[k] - tsp.gamma * time.pt[k]);
          double xOldValue = usr.x[k];
          double zOldValue = time.z[k];
          usr.x[k] += tsp.lrate * (eui * zOldValue - tsp.gamma * xOldValue);
          time.z[k] += tsp.lrate * (eui * xOldValue - tsp.gamma * zOldValue);
        }

         rmse_vec[omp_get_thread_num()] += eui*eui;
      }

      for(int e=0; e < vertex.num_edges(); e++) {  
        time_svdpp_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()];
        for(int k=0;k<dim;k++){
          mov.y[k] += tsp.lrate * (rRuNum * sum[k]- tsp.gamma*mov.y[k]);
        }
      }

    }

  };
Beispiel #13
0
  /** 
   * calc distance between two items.
   * Let a be all the users rated item 1
   * Let b be all the users rated item 2
   *
   * 1) Using Jackard index:
   *      Dist_ab = intersection(a,b) / (size(a) + size(b) - size(intersection(a,b))
   *
   * 2) Using AA index:
   *      Dist_ab = sum_user k in intersection(a,b) [ 1 / log(degree(k)) ] 
   *
   * 3) Using RA index:
   *      Dist_ab = sum_user k in intersection(a,b) [ 1 / degree(k) ] 
   *
   * 4) Using Asym Cosine:
   *      Dist_ab = intersection(a,b) / size(a)^alpha * size(b)^(1-alpha)
   */
  double calc_distance(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int distance_metric) {
    //assert(is_pivot(pivot));
    //assert(is_item(pivot) && is_item(v.id()));
    dense_adj &pivot_edges = adjs[pivot - pivot_st];
    int num_edges = v.num_edges();
    //if there are not enough neighboring user nodes to those two items there is no need
    //to actually count the intersection
    if (num_edges < min_allowed_intersection || pivot_edges.count < min_allowed_intersection)
      return 0;

    std::vector<vid_t> edges;
    edges.resize(num_edges);
    for(int i=0; i < num_edges; i++) {
      vid_t other_vertex = v.edge(i)->vertexid;
      edges[i] = other_vertex;
    }
    sort(edges.begin(), edges.end());
    
    std::set<vid_t> intersection;
    std::set_intersection(
        pivot_edges.adjlist, pivot_edges.adjlist + pivot_edges.count, 
        edges.begin(), edges.end(), 
        std::inserter(intersection, intersection.begin()));
      
    double intersection_size = (double)intersection.size();
    //not enough user nodes rated both items, so the pairs of items are not compared.
    if (intersection_size < (double)min_allowed_intersection)
        return 0;
  
    if (distance_metric == JACCARD){
      uint set_a_size = v.num_edges(); //number of users connected to current item
      uint set_b_size = acount(pivot); //number of users connected to current pivot
      return intersection_size / (double)(set_a_size + set_b_size - intersection_size); //compute the distance
    }
    else if (distance_metric == AA){
       double dist = 0;
       for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){
         vid_t user = *i;
         assert(latent_factors_inmem.size() == M && is_user(user));
         assert(latent_factors_inmem[user].degree > 0);
         dist += 1.0 / log(latent_factors_inmem[user].degree);
       }
       return dist;
    }
    else if (distance_metric == RA){
       double dist = 0;
       for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){
         vid_t user = *i;
         assert(latent_factors_inmem.size() == M && is_user(user));
         assert(latent_factors_inmem[user].degree > 0);
         dist += 1.0 / latent_factors_inmem[user].degree;
       }
       return dist;
    }
    else if (distance_metric == ASYM_COSINE){
      uint set_a_size = v.num_edges(); //number of users connected to current item
      uint set_b_size = acount(pivot); //number of users connected to current pivot
      return intersection_size / (pow(set_a_size,asym_cosine_alpha) * pow(set_b_size,1-asym_cosine_alpha));
    }

    return 0;
  }
Beispiel #14
0
  /*
   *  Vertex update function - computes the least square step
   */
  void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) {


    if (gcontext.iteration == 0){
    if (is_user(vertex.id())) { //user node. find the last rated item and store it
      vertex_data_libfm user = latent_factors_inmem[vertex.id()]; 
      int max_time = 0;
      for(int e=0; e < vertex.num_outedges(); e++) {
        const edge_data & edge = vertex.edge(e)->get_data();
        if (edge.time >= max_time){
          max_time = (int)(edge.time - time_offset);
          *user.last_item = vertex.edge(e)->vertex_id() - M;
        }
      }
    }
if (is_user(vertex.id()) && vertex.num_outedges() == 0)
      logstream(LOG_WARNING)<<"Vertex: " << vertex.id() << " with no edges: " << std::endl;
    return;
    return;
  } 
 
    //go over all user nodes
    if (is_user(vertex.id())){
      vertex_data_libfm user = latent_factors_inmem[vertex.id()]; 
      assert(*user.last_item >= 0 && *user.last_item < (int)N);
      vertex_data & last_item = latent_factors_inmem[M+N+K+(*user.last_item)]; 

      for(int e=0; e < vertex.num_outedges(); e++) {
        vertex_data_libfm movie(latent_factors_inmem[vertex.edge(e)->vertex_id()]);

        float rui = vertex.edge(e)->get_data().weight;
        double pui;
        vec sum;
        vertex_data & time = latent_factors_inmem[(int)vertex.edge(e)->get_data().time - time_offset];
        float sqErr = libfm_predict(user, movie, time, rui, pui, &sum);
        float eui = pui - rui;

        globalMean -= libfm_rate * (eui + reg0 * globalMean);
        *user.bias -= libfm_rate * (eui + libfm_regw * *user.bias);
        *movie.bias -= libfm_rate * (eui + libfm_regw * *movie.bias);
        time.bias -= libfm_rate * (eui + libfm_regw * time.bias);
        assert(!std::isnan(time.bias));
        last_item.bias -= libfm_rate * (eui + libfm_regw * last_item.bias);

        for(int f = 0; f < D; f++){
          // user
          float grad = sum[f] - user.v[f];
          user.v[f] -= libfm_rate * (eui * grad + libfm_regv * user.v[f]);
          // item
          grad = sum[f] - movie.v[f];
          movie.v[f] -= libfm_rate * (eui * grad + libfm_regv * movie.v[f]);
          // time
          grad = sum[f] - time.pvec[f];
          time.pvec[f] -= libfm_rate * (eui * grad + libfm_regv * time.pvec[f]);
          // last item
          grad = sum[f] - last_item.pvec[f];
          last_item.pvec[f] -= libfm_rate * (eui * grad + libfm_regv * last_item.pvec[f]);

        }

        rmse_vec[omp_get_thread_num()] += sqErr;
      }

    }

  };
Beispiel #15
0
Objid
db_renumber_object(Objid old)
{
    Objid newbie;
    Object *o;

    db_priv_affected_callable_verb_lookup();

    for (newbie = 0; newbie < old; newbie++) {
	if (objects[newbie] == 0) {
	    /* Change the identity of the object. */
	    o = objects[newbie] = objects[old];
	    objects[old] = 0;
	    objects[newbie]->id = newbie;

	    /* Fix up the parent/children hierarchy */
	    {
		Objid oid, *oidp;

		if (o->parent != NOTHING) {
		    oidp = &objects[o->parent]->child;
		    while (*oidp != old && *oidp != NOTHING)
			oidp = &objects[*oidp]->sibling;
		    if (*oidp == NOTHING)
			panic("Object not in parent's children list");
		    *oidp = newbie;
		}
		for (oid = o->child;
		     oid != NOTHING;
		     oid = objects[oid]->sibling)
		    objects[oid]->parent = newbie;
	    }

	    /* Fix up the location/contents hierarchy */
	    {
		Objid oid, *oidp;

		if (o->location != NOTHING) {
		    oidp = &objects[o->location]->contents;
		    while (*oidp != old && *oidp != NOTHING)
			oidp = &objects[*oidp]->next;
		    if (*oidp == NOTHING)
			panic("Object not in location's contents list");
		    *oidp = newbie;
		}
		for (oid = o->contents;
		     oid != NOTHING;
		     oid = objects[oid]->next)
		    objects[oid]->location = newbie;
	    }

	    /* Fix up the list of users, if necessary */
	    if (is_user(newbie)) {
		int i;

		for (i = 1; i <= all_users.v.list[0].v.num; i++)
		    if (all_users.v.list[i].v.obj == old) {
			all_users.v.list[i].v.obj = newbie;
			break;
		    }
	    }
	    /* Fix the owners of verbs, properties and objects */
	    {
		Objid oid;

		for (oid = 0; oid < num_objects; oid++) {
		    Object *o = objects[oid];
		    Verbdef *v;
		    Pval *p;
		    int i, count;

		    if (!o)
			continue;

		    if (o->owner == newbie)
			o->owner = NOTHING;
		    else if (o->owner == old)
			o->owner = newbie;

		    for (v = o->verbdefs; v; v = v->next)
			if (v->owner == newbie)
			    v->owner = NOTHING;
			else if (v->owner == old)
			    v->owner = newbie;

		    count = dbpriv_count_properties(oid);
		    p = o->propval;
		    for (i = 0; i < count; i++)
			if (p[i].owner == newbie)
			    p[i].owner = NOTHING;
			else if (p[i].owner == old)
			    p[i].owner = newbie;
		}
	    }

	    return newbie;
	}
    }

    /* There are no recycled objects less than `old', so keep its number. */
    return old;
}
    /**
     * add weighted ratings for each linked item
     *
     */
    double compute_ratings(graphchi_vertex<uint32_t, edge_data> &item, vid_t user_pivot, float edge_weight) {
        assert(is_pivot(user_pivot));
        if (!allow_zeros)
            assert(edge_weight != 0);
        else if (edge_weight == 0) {
            zero_edges++;
            return 0;
        }
        dense_adj &pivot_edges = adjs[user_pivot - pivot_st];

        if (!get_val(pivot_edges.edges, item.id())) {
            if (debug)
                Rcpp::Rcerr<<"Skipping item pivot pair since not connected!" << item.id() << std::endl;
            return 0;
        }

        int num_edges = item.num_edges();
        if (debug)
            Rcpp::Rcerr<<"Found " << num_edges << " edges from item : " << item.id() << std::endl;

        //if there are not enough neighboring user nodes to those two items there is no need
        //to actually count the intersection
        if (num_edges < min_allowed_intersection || nnz(pivot_edges.edges) < min_allowed_intersection) {
            if (debug)
                Rcpp::Rcerr<<"skipping item pivot pair since < min_allowed_intersection" << std::endl;
            return 0;
        }

        for(int i=0; i < num_edges; i++) {
            vid_t other_item = item.edge(i)->vertex_id();
            assert(other_item - M >= 0);

            bool up = item.id() < other_item;
            if (debug)
                Rcpp::Rcerr<<"Checking now edge: " << other_item << std::endl;

            if (is_user(other_item)) {
                if (debug)
                    Rcpp::Rcerr<<"skipping edge to user " << other_item << std::endl;
                continue;
            }

            if (!undirected && ((up && item.edge(i)->get_data().up_weight == 0) ||
                                (!up && item.edge(i)->get_data().down_weight == 0))) {
                if (debug)
                    Rcpp::Rcerr<<"skipping edge with wrong direction to " << other_item << std::endl;
                continue;
            }

            if (get_val(pivot_edges.edges, other_item)) {
                if (debug)
                    Rcpp::Rcerr<<"skipping edge to " << other_item << " because alrteady connected to pivot" << std::endl;
                continue;
            }

            assert(get_val(pivot_edges.edges, item.id()) != 0);
            float weight = std::max(item.edge(i)->get_data().down_weight, item.edge(i)->get_data().up_weight);
            if (!allow_zeros)
                assert(weight != 0);
            else if (weight == 0) continue;

            pivot_edges.mymutex.lock();
            set_val(pivot_edges.ratings, other_item-M, get_val(pivot_edges.ratings, other_item-M) + edge_weight * pow(weight,Q));
            pivot_edges.mymutex.unlock();

            if (debug)
                Rcpp::Rcerr<<"Adding weight: " << weight << " to item: " << other_item-M+1 << " for user: "******"Finished user pivot " << user_pivot << std::endl;
        return 0;
    }
Beispiel #17
0
  /**
   *  Vertex update function.
   */
  void update(CE_Graph_vertex<VertexDataType, EdgeDataType> &v, CE_Graph_context &gcontext) {
    if (debug)
      printf("Entered iteration %d with %d\n", gcontext.iteration, v.id());

    //in the zero iteration compute the mean
    if (gcontext.iteration == 0){
      if (is_item(v.id())){
        for(int i=0; i<v.num_edges(); i++) {
          CE_Graph_edge<float> * e = v.edge(i);
          vid_t user = e->vertexid;
          mean[user] += e->get_data() / (float)N;
        }
      }
    }
    //at the first iteration compute the stddev of each item from the mean
    else if (gcontext.iteration == 1){
      if (is_item(v.id())){
        dense_adj item_edges; 
        for(int i=0; i < v.num_edges(); i++) 
          set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data());
        stddev[v.id() - M] = sum(minus(item_edges.edges, mean).array().pow(2)) / (M-1.0);
        if (debug)
          std::cout<<"item: " << v.id() - M+1 << " stddev: " << stddev[v.id() - M] << std::endl;
      }
    }

    /* even iteration numbers:
     * 1) load a subset of items into memory (pivots)
     * 2) Find which subset of items needs to compared to the users
     */
    else if (gcontext.iteration % 2 == 0) {
      if (adjcontainer->is_pivot(v.id()) && is_item(v.id())){
        adjcontainer->load_edges_into_memory(v);         
        if (debug)
          printf("Loading pivot %d intro memory\n", v.id());
      }
      else if (is_user(v.id())){
        //check if this user is connected to any pivot item
        bool has_pivot = false;
        int pivot = -1;
        for(int i=0; i<v.num_edges(); i++) {
          CE_Graph_edge<float> * e = v.edge(i);
          //assert(is_item(e->vertexid)); 
          if (adjcontainer->is_pivot(e->vertexid) && relevant_items[e->vertexid-M]) {
            has_pivot = true;
            pivot = e->vertexid;
            break;
          }
        }
        if (debug)
          printf("user %d is linked to pivot %d\n", v.id(), pivot);
        if (!has_pivot) //this user is not connected to any of the pivot item nodes and thus
          //it is not relevant at this point
          return; 

        //this user is connected to a pivot items, thus all connected items should be compared
        for(int i=0; i<v.num_edges(); i++) {
          CE_Graph_edge<float> * e = v.edge(i);
          //assert(v.id() != e->vertexid);
          relevant_items[e->vertexid - M] = true;
        }
      }//is_user 
    } //iteration % 2 =  1
    /* odd iteration number:
     * 1) For any item connected to a pivot item
     *       compute itersection
     */
    else {
      if (!relevant_items[v.id() - M]){
        return;
      }

      for (vid_t i=adjcontainer->pivot_st; i< adjcontainer->pivot_en; i++){
        //since metric is symmetric, compare only to pivots which are smaller than this item id
        if (i >= v.id() || (!relevant_items[i-M]))
          continue;

        double dist = adjcontainer->calc_distance(v, i, distance_metric);
        item_pairs_compared++;
        if (item_pairs_compared % 1000000 == 0)
          logstream(LOG_INFO)<< std::setw(10) << mytimer.current_time() << ")  " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl;
        if (debug)
          printf("comparing %d to pivot %d distance is %lg\n", i - M + 1, v.id() - M + 1, dist);
        if (dist != 0){
          fprintf(out_files[omp_get_thread_num()], "%u %u %.12lg\n", v.id()-M+1, i-M+1, (double)dist);//write item similarity to file
          //where the output format is: 
          //[item A] [ item B ] [ distance ] 
          written_pairs++;
        }
      }
    }//end of iteration % 2 == 1
  }//end of update function