Esempio n. 1
0
float correlation_coeff(const R &user1, const R &user2)
{
	float numer = 0;
	float denom = 0;

	float user1r_sq_sum = 0;
	float user2r_sq_sum = 0;
	
#if defined(ALG_REF_IMPL)
	//for each product
	for (int i = 0; i < user1.size(); ++i)
	{
		float user1r = user1[i] - mean(user1);
		float user2r = user2[i] - mean(user2);
		numer += user1r * user2r;

		user1r_sq_sum += user1r * user1r;
		user2r_sq_sum += user2r * user2r;
	}
#else	//ALG_ITPP_IMPL
	R user1r = user1 - mean(user1);
	R user2r = user2 - mean(user2);
	//element-wise multiplication of user1r and user2r followed by summation of resultant elements
	numer = elem_mult_sum(user1r, user2r);
	//element-wise square of user1r followed by summation of resultant elements
	user1r_sq_sum = sum_sqr(user1r);
	//element-wise square of user2r followed by summation of resultant elements
	user2r_sq_sum = sum_sqr(user2r);
#endif
	denom = std::sqrt(user1r_sq_sum) * std::sqrt(user2r_sq_sum);
	//denom = std::sqrt(user1r_sq_sum * user2r_sq_sum); //in Recommender Systems Handbook

	return numer/denom;
}
Esempio n. 2
0
void Worker::cart_product(const Vc& in, Vc& result, const int circ) {
  /* I have an array of numbers from 0 to some radius saved in "in"
   * At first I need the cartesian product of these values
   * for Globals::dimm2 = Globals::dim - 2 instances
  */

/*  for (unsigned int i=0; i<result.size(); i++) {
    result[i]=0.0;
  }*/
  int idx;
  //the indices
  //squares_sum
  ANNcoord ss;

  //Start all of the iterators at the beginning
  for (int i=0; i<Globals::dimm2; i++) {
    vd1[i].begin=in.begin();
    vd1[i].end=in.end();
    vd1[i].me=in.begin();
  }
  while(1) {
    // Increment the rightmost one, and repeat.
    // When you reach the end, reset that one to the beginning and
    // increment the next-to-last one. You can get the "next-to-last"
    // iterator by pulling it out of the neighboring element in your
    // vector of iterators.
    idx = 0;
    for(Vd::iterator it = vd1.begin(); ; ) {
      // okay, I started at the left instead. sue me
      ++(it->me);
      if (it->me == it->end) {
        //not needed all the time obviously
        if(it + 1 == vd1.end()) {
          // I'm the last digit, and I'm about to roll
          return;
        } else {
          // cascade
          it->me = it->begin;
          result[idx] = *(it->me);
          ++it;
        }
      } else {
        // normal
        result[idx] = *(it->me);
        break;
      }
      ++idx;
    }

    //if the current cartesian product is valid for a new sphere of radius circ:
    ss=sum_sqr(Globals::dimm2, result);
    if (ss <= radiuses_sqr[circ]) {
      raster_circle_wrapper(result, circ, ss);
      //depth_all_reprs(Globals::qbs[id]);
    }

    
    cart_prod_counter++;

    if (cart_prod_counter >= (int) in.size()) {
      ov_nodes=1;
      ov_leaves=0;
      rec_count(Globals::qbs[id]);
//      std::cout << "in here"<< std::endl;
      if (ov_nodes+ov_leaves > pow(2,20)) {
        //find_all_reprs(Globals::qbs[id], &Globals::hypercube_center, 1.0, kdtree, asdf, id);
        depth_all_reprs(Globals::qbs[id]);
    //    std::cout << "find_all_reprs" << std::endl;
      }
      cart_prod_counter=0;
    }
  }
}
Esempio n. 3
0
  /** 
   * calc distance between two items.
   * Let a be all the users rated item 1
   * Let b be all the users rated item 2
   *
   * 3) Using Pearson correlation
   *      Dist_12 = (a - mean)*(b- mean)' / (std(a)*std(b))
   *
   * 4) Using cosine similarity:
   *      Dist_12 = (a*b) / sqrt(sum_sqr(a)) * sqrt(sum_sqr(b)))
   *
   * 5) Using chebychev:
   *      Dist_12 = max(abs(a-b))
   *
   * 6) Using manhatten distance:
   *      Dist_12 = sum(abs(a-b))
   *
   * 7) Using tanimoto:
   *      Dist_12 = 1.0 - [(a*b) / (sum_sqr(a) + sum_sqr(b) - a*b)]
   *
   * 8) Using log likelihood similarity
   *      Dist_12 = 1.0 - 1.0/(1.0 + loglikelihood)
   *
   * 9) Using slope one:
   *      Dist_12 = sum_(u in intersection (a,b) (r_u1-ru2 ) / size(intersection(a,b))) 
   */
  double calc_distance(CE_Graph_vertex<uint32_t, float> &v, vid_t pivot, int distance_metric) {
    //assert(is_pivot(pivot));
    //assert(is_item(pivot) && is_item(v.id()));
    dense_adj &pivot_edges = adjs[pivot - pivot_st];
    int num_edges = v.num_edges();
    //if there are not enough neighboring user nodes to those two items there is no need
    //to actually count the intersection
    if (num_edges < min_allowed_intersection || nnz(pivot_edges.edges) < min_allowed_intersection)
      return 0;

    dense_adj item_edges; 
    for(int i=0; i < num_edges; i++) 
      set_new(item_edges.edges, v.edge(i)->vertexid, v.edge(i)->get_data());

    double intersection_size = item_edges.intersect(pivot_edges); 

    //not enough user nodes rated both items, so the pairs of items are not compared.
    if (intersection_size < (double)min_allowed_intersection)
      return 0;

    if (distance_metric == PEARSON){
      if (debug){
        std::cout<< pivot -M+1<<" Pivot edges: " <<pivot_edges.edges << std::endl;
        std::cout<< "Minusmean:   " << minus(pivot_edges.edges,mean) << std::endl;
        std::cout<< v.id() -M+1<<"Item edges:  " <<item_edges.edges << std::endl;
        std::cout<< "Minusmean:   " << minus(item_edges.edges, mean) << std::endl;
      }
      double dist = minus(pivot_edges.edges, mean).dot(minus(item_edges.edges, mean));
      if (debug)
        std::cout<<"dist " << pivot-M+1 << ":" << v.id()-M+1 << " " << dist << std::endl;

      return dist / (stddev[pivot-M] * stddev[v.id()-M]);
    }
    else if (distance_metric == TANIMOTO){
      return calc_tanimoto_distance(pivot_edges.edges, 
          item_edges.edges,
          sum_sqr(pivot_edges.edges),
          sum_sqr(item_edges.edges));
    }
    else if (distance_metric == CHEBYCHEV){
      return calc_chebychev_distance(pivot_edges.edges, 
          item_edges.edges);
    }
    else if (distance_metric == LOG_LIKELIHOOD){
      return calc_loglikelihood_distance(pivot_edges.edges, 
          item_edges.edges,
          sum_sqr(pivot_edges.edges),
          sum_sqr(item_edges.edges));
    }
    else if (distance_metric == COSINE){
      return calc_cosine_distance(pivot_edges.edges, 
          item_edges.edges,
          sum_sqr(pivot_edges.edges),
          sum_sqr(item_edges.edges));
    }
    else if (distance_metric ==MANHATTEN){
      return calc_manhatten_distance(pivot_edges.edges, 
          item_edges.edges);
    }
    else if (distance_metric == SLOPE_ONE){
      return calc_slope_one_distance(pivot_edges.edges, item_edges.edges) / intersection_size;
    }
    return NAN;  
  }