std::string save_vertex(const graph_type::vertex_type& v) { std::stringstream sstream; if (is_user(v)) { const std::vector<std::pair<double, graphlab::vertex_id_type> >& top_rated = v.data().top_rated; const std::vector<std::pair<double, graphlab::vertex_id_type> >& top_pred = v.data().top_pred; if (top_rated.size() < 10 || top_pred.size() == 0) { return ""; } // save top rated sstream << v.id() << " "; sstream << pair2str(top_rated[0]); for (size_t i = 1; i < top_rated.size(); ++i) { sstream << "," << (pair2str(top_rated[i])); } // save top pred sstream << " "; sstream << pair2str(top_pred[0]); for (size_t i = 1; i < top_pred.size(); ++i) { sstream << "," << (pair2str(top_pred[i])); } sstream << "\n"; return sstream.str(); } else { return ""; } }
void edit_mail_mode(USER_DATA *usr, char *argument) { char arg[INPUT]; while (isspace(*argument)) argument++; smash_tilde(argument); usr->timer = 0; argument = one_argument(argument, arg); if (arg[0] == '\0') { func_rnew_mail(usr); return; } else if (!str_cmp(arg, "?") || !str_cmp(arg, "h")) { do_help(usr, "MAIL_DATA-INDEX"); return; } else if (!str_cmp(arg, "l")) { func_list_mail(usr); return; } else if (!str_cmp(arg, "r")) { func_reply_mail(usr, argument); return; } else if (!str_cmp(arg, "d")) { func_delete_mail(usr, argument); return; } else if (!str_cmp(arg, "q")) { func_quit_mail(usr); return; } else if (!str_cmp(arg, "c")) { if (argument[0] == '\0') { syntax("[#Wc#x]ompose <user name>", usr); return; } if (!is_user(argument)) { send_to_user("No such user.\n\r", usr); return; } if (is_enemy(usr, argument)) { send_to_user("You can't sent mail to your enemies.\n\r", usr); return; } mail_attach(usr); if (usr->pCurrentMail->to) free_string(usr->pCurrentMail->to); usr->pCurrentMail->to = str_dup(argument); EDIT_MODE(usr) = EDITOR_MAIL_SUBJECT; return; } else if (is_number(arg)) { func_read_mail(usr, arg); return; } else { send_to_user( "Unknown mail command, try '?' in order to show help.\n\r", usr); return; } }
void do_mail(USER_DATA *usr, char *argument) { char arg[INPUT]; one_argument(argument, arg); if (arg[0] == '\0') { print_to_user(usr, "\n\rTotal messages: %-3d\n\r\n\r", count_mail(usr)); do_help(usr, "MAIL_DATA-INDEX"); EDIT_MODE(usr) = EDITOR_MAIL; return; } if (!is_user(arg)) { send_to_user("No such user.\n\r", usr); return; } if (is_enemy(usr, arg)) { send_to_user("You can't sent mail to your enemies.\n\r", usr); return; } mail_attach(usr); if (usr->pCurrentMail->to) free_string(usr->pCurrentMail->to); usr->pCurrentMail->to = str_dup(arg); EDIT_MODE(usr) = EDITOR_MAIL_SUBJECT; }
void collect_function (engine_type::context_type& context, graph_type::vertex_type& vertex) { if (is_user(vertex)) { map_join_pair sum = context.map_reduce<map_join_pair>(COLLECT_TASK, ALL_EDGES); vertex.data().top_rated = sum.first.get_top_k(10); vertex.data().top_pred = sum.second.get_top_k(5); } }
void finger_mail(USER_DATA *usr, char *name) { MAIL_DATA *pMail; MAIL_DATA *fMailFirst; MAIL_DATA *fMailLast; USER_DATA *to; char buf[INPUT]; FILE *fpMail; int count = 0; int new_count = 0; if (!(to = get_user(name))) { if (!is_user(name)) { bbs_bug("Finger_mail: No such user %s", name); send_to_user("ERROR: No such user.\n\r", usr); return; } fMailFirst = NULL; fMailLast = NULL; sprintf(buf, "%s%s", MAIL_DIR, capitalize(name)); if (!(fpMail = fopen(buf, "r"))) { bbs_bug("Finger_mail: Could not open to read %s", buf); /* send_to_user("ERROR: Could not open mail file.\n\r", usr); BAXTER */ send_to_user("Mail: No new messages.\n\r\n\r", usr); return; } while ((pMail = read_mail(fpMail)) != NULL) LINK(pMail, fMailFirst, fMailLast); for (pMail = fMailFirst; pMail; pMail = pMail->next) { if (pMail) { if (pMail->stamp_time > pMail->read_time) new_count++; count++; } } if (new_count > 0) sprintf(buf, "Mail: %d new message%s.\n\r\n\r", new_count, new_count > 1 ? "s" : ""); else sprintf(buf, "Mail: No new messages.\n\r\n\r"); send_to_user(buf, usr); return; } if (unread_mail(to) > 0) sprintf(buf, "Mail: %d new message%s.\n\r\n\r", unread_mail(to), unread_mail(to) > 1 ? "s" : ""); else sprintf(buf, "Mail: No new messages.\n\r\n\r"); send_to_user(buf, usr); return; }
void db_destroy_object(Objid oid) { Object *o = dbpriv_find_object(oid); Verbdef *v, *w; int i; db_priv_affected_callable_verb_lookup(); if (!o) panic("DB_DESTROY_OBJECT: Invalid object!"); if (o->location != NOTHING || o->contents != NOTHING || o->parent != NOTHING || o->child != NOTHING) panic("DB_DESTROY_OBJECT: Not a barren orphan!"); if (is_user(oid)) { Var t; t.type = TYPE_OBJ; t.v.obj = oid; all_users = setremove(all_users, t); } free_str(o->name); for (i = 0; i < o->propdefs.cur_length; i++) { /* As an orphan, the only properties on this object are the ones * defined on it directly, so these two arrays must be the same length. */ free_str(o->propdefs.l[i].name); free_var(o->propval[i].var); } if (o->propval) myfree(o->propval, M_PVAL); if (o->propdefs.l) myfree(o->propdefs.l, M_PROPDEF); for (v = o->verbdefs; v; v = w) { if (v->program) free_program(v->program); free_str(v->name); w = v->next; myfree(v, M_VERBDEF); } myfree(objects[oid], M_OBJECT); objects[oid] = 0; }
/** * Grab pivot's adjacency list into memory. */ int load_edges_into_memory(graphchi_vertex<uint32_t, edge_data> &v) { assert(is_pivot(v.id())); assert(is_user(v.id())); int num_edges = v.num_edges(); dense_adj dadj; for(int i=0; i<num_edges; i++) set_new( dadj.edges, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight); //dadj.ratings = zeros(N); dadj.vid = v.id(); adjs[v.id() - pivot_st] = dadj; assert(v.id() - pivot_st < adjs.size()); __sync_add_and_fetch(&grabbed_edges, num_edges /*edges_to_larger_id*/); return num_edges; }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, edge_data> &v, graphchi_context &gcontext) { if (debug) printf("Entered iteration %d with %d\n", gcontext.iteration, is_item(v.id()) ? (v.id() - M + 1): v.id()); /* Even iteration numbers: * 1) load a subset of users into memory (pivots) * 2) Find which subset of items is connected to the users */ if (gcontext.iteration % 2 == 0) { if (adjcontainer->is_pivot(v.id()) && is_user(v.id())) { adjcontainer->load_edges_into_memory(v); if (debug) printf("Loading pivot %d intro memory\n", v.id()); } } /* odd iteration number: * 1) For any item connected to a pivot item * compute itersection */ else { assert(is_item(v.id())); for (int i=0; i< v.num_edges(); i++) { if (!adjcontainer->is_pivot(v.edge(i)->vertex_id())) continue; if (debug) printf("comparing user pivot %d to item %d\n", v.edge(i)->vertex_id()+1 , v.id() - M + 1); adjcontainer->compute_ratings(v, v.edge(i)->vertex_id(), v.edge(i)->get_data().up_weight); item_pairs_compared++; if (item_pairs_compared % 1000000 == 0) Rcpp::Rcout<< std::setw(10) << mytimer.current_time() << ") " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl; } }//end of iteration % 2 == 1 }//end of update function
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0) { if (is_user(vertex.id()) && vertex.num_outedges() > 0) { vertex_data& user = latent_factors_inmem[vertex.id()]; user.pvec = zeros(D*3); for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); int r = (int)(observation/rbm_scaling); assert(r < rbm_bins); mov.bi[r]++; } } return; } else if (gcontext.iteration == 1) { if (vertex.num_inedges() > 0) { rbm_movie mov = latent_factors_inmem[vertex.id()]; setRand2(mov.w, D*rbm_bins, 0.001); for(int r = 0; r < rbm_bins; ++r) { mov.bi[r] /= (double)vertex.num_inedges(); mov.bi[r] = log(1E-9 + mov.bi[r]); if (mov.bi[r] > 1000) { assert(false); Rcpp::Rcerr<<"Numerical overflow" <<std::endl; } } } return; //done with initialization } //go over all user nodes if (is_user(vertex.id()) && vertex.num_outedges()) { vertex_data & user = latent_factors_inmem[vertex.id()]; user.pvec = zeros(3*D); rbm_user usr(user); vec v1 = zeros(vertex.num_outedges()); //go over all ratings for(int e=0; e < vertex.num_outedges(); e++) { float observation = vertex.edge(e)->get_data(); rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; int r = (int)(observation / rbm_scaling); assert(r < rbm_bins); for(int k=0; k < D; k++) { usr.h[k] += mov.w[D*r + k]; assert(!std::isnan(usr.h[k])); } } for(int k=0; k < D; k++) { usr.h[k] = sigmoid(usr.h[k]); if (drand48() < usr.h[k]) usr.h0[k] = 1; else usr.h0[k] = 0; } int i = 0; double prediction; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); predict1(usr, mov, observation, prediction); int vi = (int)(prediction / rbm_scaling); v1[i] = vi; i++; } i = 0; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; int r = (int)v1[i]; for (int k=0; k< D; k++) { usr.h1[k] += mov.w[r*D+k]; } i++; } for (int k=0; k < D; k++) { usr.h1[k] = sigmoid(usr.h1[k]); if (drand48() < usr.h1[k]) usr.h1[k] = 1; else usr.h1[k] = 0; } i = 0; for(int e=0; e < vertex.num_outedges(); e++) { rbm_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; float observation = vertex.edge(e)->get_data(); double prediction; rbm_predict(user, mov, observation, prediction, NULL); double pui = prediction / rbm_scaling; double rui = observation / rbm_scaling; rmse_vec[omp_get_thread_num()] += (pui - rui) * (pui - rui); //nn += 1.0; int vi0 = (int)(rui); int vi1 = (int)v1[i]; for (int k = 0; k < D; k++) { mov.w[D*vi0+k] += rbm_alpha * (usr.h0[k] - rbm_beta * mov.w[vi0*D+k]); assert(!std::isnan(mov.w[D*vi0+k])); mov.w[D*vi1+k] -= rbm_alpha * (usr.h1[k] + rbm_beta * mov.w[vi1*D+k]); assert(!std::isnan(mov.w[D*vi1+k])); } i++; } } }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &v, graphchi_context &gcontext) { if (debug) printf("Entered iteration %d with %d\n", gcontext.iteration, v.id()); /* even iteration numbers: * 1) load a subset of items into memory (pivots) * 2) Find which subset of items needs to compared to the users */ if (gcontext.iteration % 2 == 0) { if (adjcontainer->is_pivot(v.id()) && is_item(v.id())){ adjcontainer->load_edges_into_memory(v); if (debug) printf("Loading pivot %dintro memory\n", v.id()); } else if (is_user(v.id())){ //in the zero iteration, if using AA/RA/PROB distance metric, initialize array //with node degrees if (gcontext.iteration == 0 && (distance_metric == AA || distance_metric == RA || distance_metric == PROB)){ latent_factors_inmem[v.id()].degree = v.num_edges(); } //check if this user is connected to any pivot item bool has_pivot = false; int pivot = -1; for(int i=0; i<v.num_edges(); i++) { graphchi_edge<uint32_t> * e = v.edge(i); //assert(is_item(e->vertexid)); if (adjcontainer->is_pivot(e->vertexid)) { has_pivot = true; pivot = e->vertexid; break; } } if (debug) printf("user %d is linked to pivot %d\n", v.id(), pivot); if (!has_pivot) //this user is not connected to any of the pivot item nodes and thus //it is not relevant at this point return; //this user is connected to a pivot items, thus all connected items should be compared for(int i=0; i<v.num_edges(); i++) { graphchi_edge<uint32_t> * e = v.edge(i); //assert(v.id() != e->vertexid); relevant_items[e->vertexid - M] = true; } }//is_user } //iteration % 2 = 1 /* odd iteration number: * 1) For any item connected to a pivot item * compute itersection */ else { if (!relevant_items[v.id() - M]){ if (debug) logstream(LOG_DEBUG)<<"Skipping item: " << v.id() << " since not relevant" << std::endl; return; } std::vector<index_val> heap; for (vid_t i=adjcontainer->pivot_st; i< adjcontainer->pivot_en; i++){ //if JACCARD which is symmetric, compare only to pivots which are smaller than this item id if ((distance_metric != ASYM_COSINE && i >= v.id()) || (!relevant_items[i-M])) continue; else if (distance_metric == ASYM_COSINE && i == v.id()) continue; double dist = adjcontainer->calc_distance(v, i, distance_metric); item_pairs_compared++; if (item_pairs_compared % 10000000 == 0) logstream(LOG_INFO)<< std::setw(10) << mytimer.current_time() << ") " << std::setw(10) << item_pairs_compared << " pairs compared " << std::setw(10) <<sum(written_pairs) << " written. " << std::endl; if (debug) printf("comparing %d to pivot %d distance is %g\n", i - M + 1, v.id() - M + 1, dist); if (dist != 0){ heap.push_back(index_val(i, dist)); } else zero_dist++; } sort(heap.begin(), heap.end(), &Greater); int thread_num = omp_get_thread_num(); if (heap.size() < K) not_enough++; for (uint i=0; i< std::min(heap.size(), (size_t)K); i++){ int rc = fprintf(out_files[thread_num], "%u %u %.12lg\n", v.id()-M+1, heap[i].index-M+1, (double)heap[i].val);//write item similarity to file written_pairs[omp_get_thread_num()]++; if (rc <= 0){ perror("Failed to write output"); logstream(LOG_FATAL)<<"Failed to write output to: file: " << training << omp_get_thread_num() << ".out" << std::endl; } } }//end of iteration % 2 == 1 }//end of update function
/** * calc distance between two items. * Let a be all the users rated item 1 * Let b be all the users rated item 2 * Let intersection (a,b) be the number of users rated both items * Let size(a) be the number of users rated item 1 * Let size(b) be the number of users rated item 2 * * Only for prob similarity: * Let M be the total number of users * Let N be the total number of iterms * Let L be the total number of training ratings * * 0) Using Jackard index: * Dist_12 = intersection(a,b) / (size(a) + size(b) - size(intersection(a,b)) * * 1) Using AA index: * Dist_12 = sum_user k in intersection(a,b) [ 1 / log(degree(k)) ] * * 2) Using RA index: * Dist_12 = sum_user k in intersection(a,b) [ 1 / degree(k) ] * * 3) Using Asym Cosine: * Dist_12 = intersection(a,b) / size(a)^alpha * size(b)^(1-alpha) * * 4) Using prob similarity: * Dist_12 = intersection(a,b) / [ sum(user k in b) p(k,1) ] * where p(k,1) = 1 / [ 1 + (L / (MN-L)) ((N - degree(k))/degree(K)) * ((M - degree(1)) / degree(1)) ] * */ double calc_distance(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int distance_metric) { //assert(is_pivot(pivot)); //assert(is_item(pivot) && is_item(v.id())); dense_adj &pivot_edges = adjs[pivot - pivot_st]; int num_edges = v.num_edges(); //if there are not enough neighboring user nodes to those two items there is no need //to actually count the intersection if (num_edges < min_allowed_intersection || pivot_edges.count < min_allowed_intersection) return 0; std::vector<vid_t> edges; edges.resize(num_edges); for(int i=0; i < num_edges; i++) { vid_t other_vertex = v.edge(i)->vertexid; edges[i] = other_vertex; } sort(edges.begin(), edges.end()); std::set<vid_t> intersection; std::set_intersection( pivot_edges.adjlist, pivot_edges.adjlist + pivot_edges.count, edges.begin(), edges.end(), std::inserter(intersection, intersection.begin())); double intersection_size = (double)intersection.size(); //not enough user nodes rated both items, so the pairs of items are not compared. if (intersection_size < (double)min_allowed_intersection) return 0; if (distance_metric == JACCARD){ uint set_a_size = v.num_edges(); //number of users connected to current item uint set_b_size = acount(pivot); //number of users connected to current pivot return intersection_size / (double)(set_a_size + set_b_size - intersection_size); //compute the distance } else if (distance_metric == AA){ double dist = 0; for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){ vid_t user = *i; assert(latent_factors_inmem.size() == M && is_user(user)); assert(latent_factors_inmem[user].degree > 0); dist += 1.0 / log(latent_factors_inmem[user].degree); } return dist; } else if (distance_metric == RA){ double dist = 0; for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){ vid_t user = *i; assert(latent_factors_inmem.size() == M && is_user(user)); assert(latent_factors_inmem[user].degree > 0); dist += 1.0 / latent_factors_inmem[user].degree; } return dist; } /* 3) Using Asym Cosine: * Dist_12 = intersection(a,b) / size(a)^alpha * size(b)^(1-alpha) */ else if (distance_metric == ASYM_COSINE){ uint set_a_size = v.num_edges(); //number of users connected to current item uint set_b_size = acount(pivot); //number of users connected to current pivot return intersection_size / (pow(set_a_size,asym_cosine_alpha) * pow(set_b_size,1-asym_cosine_alpha)); } /* 4) Using prob similarity: * Dist_12 = intersection(a,b) / [ sum(user k in b) p(k,1) ] * where p(k,1) = 1 / [ 1 + (L / (MN-L)) ((N - degree(k))/degree(K)) * ((M - degree(1)) / degree(1)) ] */ else if (distance_metric == PROB){ double sum = 0; for(int i=0; i<pivot_edges.count; i++) { int node_k = pivot_edges.adjlist[i]; int degree_k = latent_factors_inmem[node_k].degree; assert(degree_k > 0); double p_k_1 = 1.0 / ( 1.0 + prob_sim_normalization_constant * ((N - degree_k)/(double)degree_k) * ((M - num_edges) / (double)num_edges)); assert(p_k_1 > 0 && p_k_1 <= 1.0); sum += p_k_1; } return intersection_size / sum; } else { assert(false); } return -1; //just to avoid warning }
/* * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { //go over all user nodes if (is_user(vertex.id())){ vertex_data & user = latent_factors_inmem[vertex.id()]; time_svdpp_usr usr(user); unsigned int userRatings = vertex.num_outedges(); double rRuNum = 1/sqrt(userRatings+10); int dim = D; double sumY = 0.0; //go over all ratings for(int e=0; e < vertex.num_outedges(); e++) { uint pos = vertex.edge(e)->vertex_id(); assert(pos >= M && pos < M+N); vertex_data & data = latent_factors_inmem[pos]; time_svdpp_movie movie(data); Map<vec> y(movie.y, D); sumY += sum((const vec&)y); //y } for( int k=0; k<dim; ++k) { usr.ptemp[k] = usr.pu[k] + rRuNum * sumY; // pTemp = pu + rRuNum*sumY } vec sum = zeros(dim); for(int e=0; e < vertex.num_edges(); e++) { //edge_data & edge = scope.edge_data(oedgeid); //float rui = edge.weight; float rui = vertex.edge(e)->get_data().weight; uint t = (uint)(vertex.edge(e)->get_data().time - 1); // we assume time bins start from 1 assert(t < M+N+K); vertex_data & data = latent_factors_inmem[vertex.edge(e)->vertex_id()]; time_svdpp_movie mov(data); time_svdpp_time time(latent_factors_inmem[t]); double pui = 0; time_svdpp_predict(usr, mov, time, rui, pui); double eui = rui - pui; *usr.bu += tsp.lrate*(eui - tsp.beta* *usr.bu); *mov.bi += tsp.lrate * (eui - tsp.beta* *mov.bi); for (int k = 0; k < dim; k++) { double oldValue = mov.q[k]; double userValue = usr.ptemp[k] + usr.pu[k] * time.pt[k]; sum[k] += eui * mov.q[k]; mov.q[k] += tsp.lrate * (eui * userValue - tsp.gamma*mov.q[k]); usr.ptemp[k] += tsp.lrate * ( eui * oldValue - tsp.gamma * usr.ptemp[k]); usr.p[k] += tsp.lrate * ( eui * oldValue - tsp.gamma*usr.p[k] ); usr.pu[k] += tsp.lrate * (eui * oldValue * time.pt[k] - tsp.gamma * usr.pu[k]); time.pt[k] += tsp.lrate * (eui * oldValue * usr.pu[k] - tsp.gamma * time.pt[k]); double xOldValue = usr.x[k]; double zOldValue = time.z[k]; usr.x[k] += tsp.lrate * (eui * zOldValue - tsp.gamma * xOldValue); time.z[k] += tsp.lrate * (eui * xOldValue - tsp.gamma * zOldValue); } rmse_vec[omp_get_thread_num()] += eui*eui; } for(int e=0; e < vertex.num_edges(); e++) { time_svdpp_movie mov = latent_factors_inmem[vertex.edge(e)->vertex_id()]; for(int k=0;k<dim;k++){ mov.y[k] += tsp.lrate * (rRuNum * sum[k]- tsp.gamma*mov.y[k]); } } } };
/** * calc distance between two items. * Let a be all the users rated item 1 * Let b be all the users rated item 2 * * 1) Using Jackard index: * Dist_ab = intersection(a,b) / (size(a) + size(b) - size(intersection(a,b)) * * 2) Using AA index: * Dist_ab = sum_user k in intersection(a,b) [ 1 / log(degree(k)) ] * * 3) Using RA index: * Dist_ab = sum_user k in intersection(a,b) [ 1 / degree(k) ] * * 4) Using Asym Cosine: * Dist_ab = intersection(a,b) / size(a)^alpha * size(b)^(1-alpha) */ double calc_distance(graphchi_vertex<uint32_t, uint32_t> &v, vid_t pivot, int distance_metric) { //assert(is_pivot(pivot)); //assert(is_item(pivot) && is_item(v.id())); dense_adj &pivot_edges = adjs[pivot - pivot_st]; int num_edges = v.num_edges(); //if there are not enough neighboring user nodes to those two items there is no need //to actually count the intersection if (num_edges < min_allowed_intersection || pivot_edges.count < min_allowed_intersection) return 0; std::vector<vid_t> edges; edges.resize(num_edges); for(int i=0; i < num_edges; i++) { vid_t other_vertex = v.edge(i)->vertexid; edges[i] = other_vertex; } sort(edges.begin(), edges.end()); std::set<vid_t> intersection; std::set_intersection( pivot_edges.adjlist, pivot_edges.adjlist + pivot_edges.count, edges.begin(), edges.end(), std::inserter(intersection, intersection.begin())); double intersection_size = (double)intersection.size(); //not enough user nodes rated both items, so the pairs of items are not compared. if (intersection_size < (double)min_allowed_intersection) return 0; if (distance_metric == JACCARD){ uint set_a_size = v.num_edges(); //number of users connected to current item uint set_b_size = acount(pivot); //number of users connected to current pivot return intersection_size / (double)(set_a_size + set_b_size - intersection_size); //compute the distance } else if (distance_metric == AA){ double dist = 0; for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){ vid_t user = *i; assert(latent_factors_inmem.size() == M && is_user(user)); assert(latent_factors_inmem[user].degree > 0); dist += 1.0 / log(latent_factors_inmem[user].degree); } return dist; } else if (distance_metric == RA){ double dist = 0; for (std::set<vid_t>::iterator i= intersection.begin() ; i != intersection.end(); i++){ vid_t user = *i; assert(latent_factors_inmem.size() == M && is_user(user)); assert(latent_factors_inmem[user].degree > 0); dist += 1.0 / latent_factors_inmem[user].degree; } return dist; } else if (distance_metric == ASYM_COSINE){ uint set_a_size = v.num_edges(); //number of users connected to current item uint set_b_size = acount(pivot); //number of users connected to current pivot return intersection_size / (pow(set_a_size,asym_cosine_alpha) * pow(set_b_size,1-asym_cosine_alpha)); } return 0; }
/* * Vertex update function - computes the least square step */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (gcontext.iteration == 0){ if (is_user(vertex.id())) { //user node. find the last rated item and store it vertex_data_libfm user = latent_factors_inmem[vertex.id()]; int max_time = 0; for(int e=0; e < vertex.num_outedges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); if (edge.time >= max_time){ max_time = (int)(edge.time - time_offset); *user.last_item = vertex.edge(e)->vertex_id() - M; } } } if (is_user(vertex.id()) && vertex.num_outedges() == 0) logstream(LOG_WARNING)<<"Vertex: " << vertex.id() << " with no edges: " << std::endl; return; return; } //go over all user nodes if (is_user(vertex.id())){ vertex_data_libfm user = latent_factors_inmem[vertex.id()]; assert(*user.last_item >= 0 && *user.last_item < (int)N); vertex_data & last_item = latent_factors_inmem[M+N+K+(*user.last_item)]; for(int e=0; e < vertex.num_outedges(); e++) { vertex_data_libfm movie(latent_factors_inmem[vertex.edge(e)->vertex_id()]); float rui = vertex.edge(e)->get_data().weight; double pui; vec sum; vertex_data & time = latent_factors_inmem[(int)vertex.edge(e)->get_data().time - time_offset]; float sqErr = libfm_predict(user, movie, time, rui, pui, &sum); float eui = pui - rui; globalMean -= libfm_rate * (eui + reg0 * globalMean); *user.bias -= libfm_rate * (eui + libfm_regw * *user.bias); *movie.bias -= libfm_rate * (eui + libfm_regw * *movie.bias); time.bias -= libfm_rate * (eui + libfm_regw * time.bias); assert(!std::isnan(time.bias)); last_item.bias -= libfm_rate * (eui + libfm_regw * last_item.bias); for(int f = 0; f < D; f++){ // user float grad = sum[f] - user.v[f]; user.v[f] -= libfm_rate * (eui * grad + libfm_regv * user.v[f]); // item grad = sum[f] - movie.v[f]; movie.v[f] -= libfm_rate * (eui * grad + libfm_regv * movie.v[f]); // time grad = sum[f] - time.pvec[f]; time.pvec[f] -= libfm_rate * (eui * grad + libfm_regv * time.pvec[f]); // last item grad = sum[f] - last_item.pvec[f]; last_item.pvec[f] -= libfm_rate * (eui * grad + libfm_regv * last_item.pvec[f]); } rmse_vec[omp_get_thread_num()] += sqErr; } } };
Objid db_renumber_object(Objid old) { Objid newbie; Object *o; db_priv_affected_callable_verb_lookup(); for (newbie = 0; newbie < old; newbie++) { if (objects[newbie] == 0) { /* Change the identity of the object. */ o = objects[newbie] = objects[old]; objects[old] = 0; objects[newbie]->id = newbie; /* Fix up the parent/children hierarchy */ { Objid oid, *oidp; if (o->parent != NOTHING) { oidp = &objects[o->parent]->child; while (*oidp != old && *oidp != NOTHING) oidp = &objects[*oidp]->sibling; if (*oidp == NOTHING) panic("Object not in parent's children list"); *oidp = newbie; } for (oid = o->child; oid != NOTHING; oid = objects[oid]->sibling) objects[oid]->parent = newbie; } /* Fix up the location/contents hierarchy */ { Objid oid, *oidp; if (o->location != NOTHING) { oidp = &objects[o->location]->contents; while (*oidp != old && *oidp != NOTHING) oidp = &objects[*oidp]->next; if (*oidp == NOTHING) panic("Object not in location's contents list"); *oidp = newbie; } for (oid = o->contents; oid != NOTHING; oid = objects[oid]->next) objects[oid]->location = newbie; } /* Fix up the list of users, if necessary */ if (is_user(newbie)) { int i; for (i = 1; i <= all_users.v.list[0].v.num; i++) if (all_users.v.list[i].v.obj == old) { all_users.v.list[i].v.obj = newbie; break; } } /* Fix the owners of verbs, properties and objects */ { Objid oid; for (oid = 0; oid < num_objects; oid++) { Object *o = objects[oid]; Verbdef *v; Pval *p; int i, count; if (!o) continue; if (o->owner == newbie) o->owner = NOTHING; else if (o->owner == old) o->owner = newbie; for (v = o->verbdefs; v; v = v->next) if (v->owner == newbie) v->owner = NOTHING; else if (v->owner == old) v->owner = newbie; count = dbpriv_count_properties(oid); p = o->propval; for (i = 0; i < count; i++) if (p[i].owner == newbie) p[i].owner = NOTHING; else if (p[i].owner == old) p[i].owner = newbie; } } return newbie; } } /* There are no recycled objects less than `old', so keep its number. */ return old; }
/** * add weighted ratings for each linked item * */ double compute_ratings(graphchi_vertex<uint32_t, edge_data> &item, vid_t user_pivot, float edge_weight) { assert(is_pivot(user_pivot)); if (!allow_zeros) assert(edge_weight != 0); else if (edge_weight == 0) { zero_edges++; return 0; } dense_adj &pivot_edges = adjs[user_pivot - pivot_st]; if (!get_val(pivot_edges.edges, item.id())) { if (debug) Rcpp::Rcerr<<"Skipping item pivot pair since not connected!" << item.id() << std::endl; return 0; } int num_edges = item.num_edges(); if (debug) Rcpp::Rcerr<<"Found " << num_edges << " edges from item : " << item.id() << std::endl; //if there are not enough neighboring user nodes to those two items there is no need //to actually count the intersection if (num_edges < min_allowed_intersection || nnz(pivot_edges.edges) < min_allowed_intersection) { if (debug) Rcpp::Rcerr<<"skipping item pivot pair since < min_allowed_intersection" << std::endl; return 0; } for(int i=0; i < num_edges; i++) { vid_t other_item = item.edge(i)->vertex_id(); assert(other_item - M >= 0); bool up = item.id() < other_item; if (debug) Rcpp::Rcerr<<"Checking now edge: " << other_item << std::endl; if (is_user(other_item)) { if (debug) Rcpp::Rcerr<<"skipping edge to user " << other_item << std::endl; continue; } if (!undirected && ((up && item.edge(i)->get_data().up_weight == 0) || (!up && item.edge(i)->get_data().down_weight == 0))) { if (debug) Rcpp::Rcerr<<"skipping edge with wrong direction to " << other_item << std::endl; continue; } if (get_val(pivot_edges.edges, other_item)) { if (debug) Rcpp::Rcerr<<"skipping edge to " << other_item << " because alrteady connected to pivot" << std::endl; continue; } assert(get_val(pivot_edges.edges, item.id()) != 0); float weight = std::max(item.edge(i)->get_data().down_weight, item.edge(i)->get_data().up_weight); if (!allow_zeros) assert(weight != 0); else if (weight == 0) continue; pivot_edges.mymutex.lock(); set_val(pivot_edges.ratings, other_item-M, get_val(pivot_edges.ratings, other_item-M) + edge_weight * pow(weight,Q)); pivot_edges.mymutex.unlock(); if (debug) Rcpp::Rcerr<<"Adding weight: " << weight << " to item: " << other_item-M+1 << " for user: "******"Finished user pivot " << user_pivot << std::endl; return 0; }
/** * Vertex update function. */ void update(CE_Graph_vertex<VertexDataType, EdgeDataType> &v, CE_Graph_context &gcontext) { if (debug) printf("Entered iteration %d with %d\n", gcontext.iteration, v.id()); //in the zero iteration compute the mean if (gcontext.iteration == 0){ if (is_item(v.id())){ for(int i=0; i<v.num_edges(); i++) { CE_Graph_edge<float> * e = v.edge(i); vid_t user = e->vertexid; mean[user] += e->get_data() / (float)N; } } } //at the first iteration compute the stddev of each item from the mean else if (gcontext.iteration == 1){ if (is_item(v.id())){ dense_adj item_edges; for(int i=0; i < v.num_edges(); i++) set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data()); stddev[v.id() - M] = sum(minus(item_edges.edges, mean).array().pow(2)) / (M-1.0); if (debug) std::cout<<"item: " << v.id() - M+1 << " stddev: " << stddev[v.id() - M] << std::endl; } } /* even iteration numbers: * 1) load a subset of items into memory (pivots) * 2) Find which subset of items needs to compared to the users */ else if (gcontext.iteration % 2 == 0) { if (adjcontainer->is_pivot(v.id()) && is_item(v.id())){ adjcontainer->load_edges_into_memory(v); if (debug) printf("Loading pivot %d intro memory\n", v.id()); } else if (is_user(v.id())){ //check if this user is connected to any pivot item bool has_pivot = false; int pivot = -1; for(int i=0; i<v.num_edges(); i++) { CE_Graph_edge<float> * e = v.edge(i); //assert(is_item(e->vertexid)); if (adjcontainer->is_pivot(e->vertexid) && relevant_items[e->vertexid-M]) { has_pivot = true; pivot = e->vertexid; break; } } if (debug) printf("user %d is linked to pivot %d\n", v.id(), pivot); if (!has_pivot) //this user is not connected to any of the pivot item nodes and thus //it is not relevant at this point return; //this user is connected to a pivot items, thus all connected items should be compared for(int i=0; i<v.num_edges(); i++) { CE_Graph_edge<float> * e = v.edge(i); //assert(v.id() != e->vertexid); relevant_items[e->vertexid - M] = true; } }//is_user } //iteration % 2 = 1 /* odd iteration number: * 1) For any item connected to a pivot item * compute itersection */ else { if (!relevant_items[v.id() - M]){ return; } for (vid_t i=adjcontainer->pivot_st; i< adjcontainer->pivot_en; i++){ //since metric is symmetric, compare only to pivots which are smaller than this item id if (i >= v.id() || (!relevant_items[i-M])) continue; double dist = adjcontainer->calc_distance(v, i, distance_metric); item_pairs_compared++; if (item_pairs_compared % 1000000 == 0) logstream(LOG_INFO)<< std::setw(10) << mytimer.current_time() << ") " << std::setw(10) << item_pairs_compared << " pairs compared " << std::endl; if (debug) printf("comparing %d to pivot %d distance is %lg\n", i - M + 1, v.id() - M + 1, dist); if (dist != 0){ fprintf(out_files[omp_get_thread_num()], "%u %u %.12lg\n", v.id()-M+1, i-M+1, (double)dist);//write item similarity to file //where the output format is: //[item A] [ item B ] [ distance ] written_pairs++; } } }//end of iteration % 2 == 1 }//end of update function