void GetNeighborhood(const arma::Col<size_t>& users, const size_t numUsersForSimilarity, arma::Mat<size_t>& neighborhood, arma::mat& similarities) const { // We want to avoid calculating the full rating matrix, so we will do // nearest neighbor search only on the H matrix, using the observation that // if the rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), // W H.col(j)). This can be seen as nearest neighbor search on the H // matrix with the Mahalanobis distance where M^{-1} = W^T W. So, we'll // decompose M^{-1} = L L^T (the Cholesky decomposition), and then multiply // H by L^T. Then we can perform nearest neighbor search. arma::mat l = arma::chol(w.t() * w); arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T. // Temporarily store feature vector of queried users. arma::mat query(stretchedH.n_rows, users.n_elem); // Select feature vectors of queried users. for (size_t i = 0; i < users.n_elem; i++) query.col(i) = stretchedH.col(users(i)); NeighborSearchPolicy neighborSearch(stretchedH); neighborSearch.Search( query, numUsersForSimilarity, neighborhood, similarities); }
double MeanShift<UseKernel, KernelType, MatType>:: EstimateRadius(const MatType& data, double ratio) { neighbor::KNN neighborSearch(data); /** * For each point in dataset, select nNeighbors nearest points and get * nNeighbors distances. Use the maximum distance to estimate the duplicate * threshhold. */ const size_t nNeighbors = size_t(data.n_cols * ratio); arma::Mat<size_t> neighbors; arma::mat distances; neighborSearch.Search(nNeighbors, neighbors, distances); // Get max distance for each point. arma::rowvec maxDistances = max(distances); // Calculate and return the radius. return sum(maxDistances) / (double) data.n_cols; }