float correlation_coeff(const R &user1, const R &user2) { float numer = 0; float denom = 0; float user1r_sq_sum = 0; float user2r_sq_sum = 0; #if defined(ALG_REF_IMPL) //for each product for (int i = 0; i < user1.size(); ++i) { float user1r = user1[i] - mean(user1); float user2r = user2[i] - mean(user2); numer += user1r * user2r; user1r_sq_sum += user1r * user1r; user2r_sq_sum += user2r * user2r; } #else //ALG_ITPP_IMPL R user1r = user1 - mean(user1); R user2r = user2 - mean(user2); //element-wise multiplication of user1r and user2r followed by summation of resultant elements numer = elem_mult_sum(user1r, user2r); //element-wise square of user1r followed by summation of resultant elements user1r_sq_sum = sum_sqr(user1r); //element-wise square of user2r followed by summation of resultant elements user2r_sq_sum = sum_sqr(user2r); #endif denom = std::sqrt(user1r_sq_sum) * std::sqrt(user2r_sq_sum); //denom = std::sqrt(user1r_sq_sum * user2r_sq_sum); //in Recommender Systems Handbook return numer/denom; }
void Worker::cart_product(const Vc& in, Vc& result, const int circ) { /* I have an array of numbers from 0 to some radius saved in "in" * At first I need the cartesian product of these values * for Globals::dimm2 = Globals::dim - 2 instances */ /* for (unsigned int i=0; i<result.size(); i++) { result[i]=0.0; }*/ int idx; //the indices //squares_sum ANNcoord ss; //Start all of the iterators at the beginning for (int i=0; i<Globals::dimm2; i++) { vd1[i].begin=in.begin(); vd1[i].end=in.end(); vd1[i].me=in.begin(); } while(1) { // Increment the rightmost one, and repeat. // When you reach the end, reset that one to the beginning and // increment the next-to-last one. You can get the "next-to-last" // iterator by pulling it out of the neighboring element in your // vector of iterators. idx = 0; for(Vd::iterator it = vd1.begin(); ; ) { // okay, I started at the left instead. sue me ++(it->me); if (it->me == it->end) { //not needed all the time obviously if(it + 1 == vd1.end()) { // I'm the last digit, and I'm about to roll return; } else { // cascade it->me = it->begin; result[idx] = *(it->me); ++it; } } else { // normal result[idx] = *(it->me); break; } ++idx; } //if the current cartesian product is valid for a new sphere of radius circ: ss=sum_sqr(Globals::dimm2, result); if (ss <= radiuses_sqr[circ]) { raster_circle_wrapper(result, circ, ss); //depth_all_reprs(Globals::qbs[id]); } cart_prod_counter++; if (cart_prod_counter >= (int) in.size()) { ov_nodes=1; ov_leaves=0; rec_count(Globals::qbs[id]); // std::cout << "in here"<< std::endl; if (ov_nodes+ov_leaves > pow(2,20)) { //find_all_reprs(Globals::qbs[id], &Globals::hypercube_center, 1.0, kdtree, asdf, id); depth_all_reprs(Globals::qbs[id]); // std::cout << "find_all_reprs" << std::endl; } cart_prod_counter=0; } } }
/** * calc distance between two items. * Let a be all the users rated item 1 * Let b be all the users rated item 2 * * 3) Using Pearson correlation * Dist_12 = (a - mean)*(b- mean)' / (std(a)*std(b)) * * 4) Using cosine similarity: * Dist_12 = (a*b) / sqrt(sum_sqr(a)) * sqrt(sum_sqr(b))) * * 5) Using chebychev: * Dist_12 = max(abs(a-b)) * * 6) Using manhatten distance: * Dist_12 = sum(abs(a-b)) * * 7) Using tanimoto: * Dist_12 = 1.0 - [(a*b) / (sum_sqr(a) + sum_sqr(b) - a*b)] * * 8) Using log likelihood similarity * Dist_12 = 1.0 - 1.0/(1.0 + loglikelihood) * * 9) Using slope one: * Dist_12 = sum_(u in intersection (a,b) (r_u1-ru2 ) / size(intersection(a,b))) */ double calc_distance(CE_Graph_vertex<uint32_t, float> &v, vid_t pivot, int distance_metric) { //assert(is_pivot(pivot)); //assert(is_item(pivot) && is_item(v.id())); dense_adj &pivot_edges = adjs[pivot - pivot_st]; int num_edges = v.num_edges(); //if there are not enough neighboring user nodes to those two items there is no need //to actually count the intersection if (num_edges < min_allowed_intersection || nnz(pivot_edges.edges) < min_allowed_intersection) return 0; dense_adj item_edges; for(int i=0; i < num_edges; i++) set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data()); double intersection_size = item_edges.intersect(pivot_edges); //not enough user nodes rated both items, so the pairs of items are not compared. if (intersection_size < (double)min_allowed_intersection) return 0; if (distance_metric == PEARSON){ if (debug){ std::cout<< pivot -M+1<<" Pivot edges: " <<pivot_edges.edges << std::endl; std::cout<< "Minusmean: " << minus(pivot_edges.edges,mean) << std::endl; std::cout<< v.id() -M+1<<"Item edges: " <<item_edges.edges << std::endl; std::cout<< "Minusmean: " << minus(item_edges.edges, mean) << std::endl; } double dist = minus(pivot_edges.edges, mean).dot(minus(item_edges.edges, mean)); if (debug) std::cout<<"dist " << pivot-M+1 << ":" << v.id()-M+1 << " " << dist << std::endl; return dist / (stddev[pivot-M] * stddev[v.id()-M]); } else if (distance_metric == TANIMOTO){ return calc_tanimoto_distance(pivot_edges.edges, item_edges.edges, sum_sqr(pivot_edges.edges), sum_sqr(item_edges.edges)); } else if (distance_metric == CHEBYCHEV){ return calc_chebychev_distance(pivot_edges.edges, item_edges.edges); } else if (distance_metric == LOG_LIKELIHOOD){ return calc_loglikelihood_distance(pivot_edges.edges, item_edges.edges, sum_sqr(pivot_edges.edges), sum_sqr(item_edges.edges)); } else if (distance_metric == COSINE){ return calc_cosine_distance(pivot_edges.edges, item_edges.edges, sum_sqr(pivot_edges.edges), sum_sqr(item_edges.edges)); } else if (distance_metric ==MANHATTEN){ return calc_manhatten_distance(pivot_edges.edges, item_edges.edges); } else if (distance_metric == SLOPE_ONE){ return calc_slope_one_distance(pivot_edges.edges, item_edges.edges) / intersection_size; } return NAN; }