void LSHSearch<SortPolicy>:: Search(const size_t k, arma::Mat<size_t>& resultingNeighbors, arma::mat& distances, const size_t numTablesToSearch) { // Set the size of the neighbor and distance matrices. resultingNeighbors.set_size(k, querySet.n_cols); distances.set_size(k, querySet.n_cols); distances.fill(SortPolicy::WorstDistance()); resultingNeighbors.fill(referenceSet.n_cols); size_t avgIndicesReturned = 0; Timer::Start("computing_neighbors"); // Go through every query point sequentially. for (size_t i = 0; i < querySet.n_cols; i++) { // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; ReturnIndicesFromTable(i, refIndices, numTablesToSearch); // An informative book-keeping for the number of neighbor candidates // returned on average. avgIndicesReturned += refIndices.n_elem; // Sequentially go through all the candidates and save the best 'k' // candidates. for (size_t j = 0; j < refIndices.n_elem; j++) BaseCase(distances, resultingNeighbors, i, (size_t) refIndices[j]); } Timer::Stop("computing_neighbors"); distanceEvaluations += avgIndicesReturned; avgIndicesReturned /= querySet.n_cols; Log::Info << avgIndicesReturned << " distinct indices returned on average." << std::endl; }
void DefaultEvaluatorForIntegralOperators<BasisFunctionType, KernelType, ResultType, GeometryFactory>::evaluate( Region region, const arma::Mat<CoordinateType>& points, arma::Mat<ResultType>& result) const { const size_t pointCount = points.n_cols; const int outputComponentCount = m_integral->resultDimension(); result.set_size(outputComponentCount, pointCount); result.fill(0.); const GeometricalData<CoordinateType>& trialGeomData = (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ? m_nearFieldTrialGeomData : m_farFieldTrialGeomData; const CollectionOf2dArrays<ResultType>& trialTransfValues = (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ? m_nearFieldTrialTransfValues : m_farFieldTrialTransfValues; const std::vector<CoordinateType>& weights = (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ? m_nearFieldWeights : m_farFieldWeights; // Do things in chunks of 96 points -- in order to avoid creating // too large arrays of kernel values const size_t chunkSize = 96; const size_t chunkCount = (pointCount + chunkSize - 1) / chunkSize; int maxThreadCount = 1; if (!m_parallelizationOptions.isOpenClEnabled()) { if (m_parallelizationOptions.maxThreadCount() == ParallelizationOptions::AUTO) maxThreadCount = tbb::task_scheduler_init::automatic; else maxThreadCount = m_parallelizationOptions.maxThreadCount(); } tbb::task_scheduler_init scheduler(maxThreadCount); typedef EvaluationLoopBody< BasisFunctionType, KernelType, ResultType> Body; { Fiber::SerialBlasRegion region; tbb::parallel_for(tbb::blocked_range<size_t>(0, chunkCount), Body(chunkSize, points, trialGeomData, trialTransfValues, weights, *m_kernels, *m_integral, result)); } // // Old serial version // CollectionOf4dArrays<KernelType> kernelValues; // GeometricalData<CoordinateType> evalPointGeomData; // for (size_t start = 0; start < pointCount; start += chunkSize) // { // size_t end = std::min(start + chunkSize, pointCount); // evalPointGeomData.globals = points.cols(start, end - 1 /* inclusive */); // m_kernels->evaluateOnGrid(evalPointGeomData, trialGeomData, kernelValues); // // View into the current chunk of the "result" array // _2dArray<ResultType> resultChunk(outputComponentCount, end - start, // result.colptr(start)); // m_integral->evaluate(trialGeomData, // kernelValues, // weightedTrialTransfValues, // resultChunk); // } }
void CF<FactorizerType>::GetRecommendations(const size_t numRecs, arma::Mat<size_t>& recommendations, arma::Col<size_t>& users) { // Generate new table by multiplying approximate values. rating = w * h; // Now, we will use the decomposed w and h matrices to estimate what the user // would have rated items as, and then pick the best items. // Temporarily store feature vector of queried users. arma::mat query(rating.n_rows, users.n_elem); // Select feature vectors of queried users. for (size_t i = 0; i < users.n_elem; i++) query.col(i) = rating.col(users(i)); // Temporary storage for neighborhood of the queried users. arma::Mat<size_t> neighborhood; // Calculate the neighborhood of the queried users. // This should be a templatized option. neighbor::AllkNN a(rating); arma::mat resultingDistances; // Temporary storage. a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances); // Temporary storage for storing the average rating for each user in their // neighborhood. arma::mat averages = arma::zeros<arma::mat>(rating.n_rows, query.n_cols); // Iterate over each query user. for (size_t i = 0; i < neighborhood.n_cols; ++i) { // Iterate over each neighbor of the query user. for (size_t j = 0; j < neighborhood.n_rows; ++j) averages.col(i) += rating.col(neighborhood(j, i)); // Normalize average. averages.col(i) /= neighborhood.n_rows; } // Generate recommendations for each query user by finding the maximum numRecs // elements in the averages matrix. recommendations.set_size(numRecs, users.n_elem); recommendations.fill(cleanedData.n_rows); // Invalid item number. arma::mat values(numRecs, users.n_elem); values.fill(-DBL_MAX); // The smallest possible value. for (size_t i = 0; i < users.n_elem; i++) { // Look through the averages column corresponding to the current user. for (size_t j = 0; j < averages.n_rows; ++j) { // Ensure that the user hasn't already rated the item. if (cleanedData(j, users(i)) != 0.0) continue; // The user already rated the item. // Is the estimated value better than the worst candidate? const double value = averages(j, i); if (value > values(values.n_rows - 1, i)) { // It should be inserted. Which position? size_t insertPosition = values.n_rows - 1; while (insertPosition > 0) { if (value <= values(insertPosition - 1, i)) break; // The current value is the right one. insertPosition--; } // Now insert it into the list. InsertNeighbor(i, insertPosition, j, value, recommendations, values); } } // If we were not able to come up with enough recommendations, issue a // warning. if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1) Log::Warn << "Could not provide " << values.n_rows << " recommendations " << "for user " << users(i) << " (not enough un-rated items)!" << std::endl; } }
void CF::GetRecommendations(const size_t numRecs, arma::Mat<size_t>& recommendations, arma::Col<size_t>& users) { // We want to avoid calculating the full rating matrix, so we will do nearest // neighbor search only on the H matrix, using the observation that if the // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W // H.col(j)). This can be seen as nearest neighbor search on the H matrix // with the Mahalanobis distance where M^{-1} = W^T W. So, we'll decompose // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T. // Then we can perform nearest neighbor search. arma::mat l = arma::chol(w.t() * w); arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T. // Now, we will use the decomposed w and h matrices to estimate what the user // would have rated items as, and then pick the best items. // Temporarily store feature vector of queried users. arma::mat query(stretchedH.n_rows, users.n_elem); // Select feature vectors of queried users. for (size_t i = 0; i < users.n_elem; i++) query.col(i) = stretchedH.col(users(i)); // Temporary storage for neighborhood of the queried users. arma::Mat<size_t> neighborhood; // Calculate the neighborhood of the queried users. // This should be a templatized option. neighbor::KNN a(stretchedH); arma::mat resultingDistances; // Temporary storage. a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances); // Generate recommendations for each query user by finding the maximum numRecs // elements in the averages matrix. recommendations.set_size(numRecs, users.n_elem); recommendations.fill(cleanedData.n_rows); // Invalid item number. arma::mat values(numRecs, users.n_elem); values.fill(-DBL_MAX); // The smallest possible value. for (size_t i = 0; i < users.n_elem; i++) { // First, calculate average of neighborhood values. arma::vec averages; averages.zeros(cleanedData.n_rows); for (size_t j = 0; j < neighborhood.n_rows; ++j) averages += w * h.col(neighborhood(j, i)); averages /= neighborhood.n_rows; // Look through the averages column corresponding to the current user. for (size_t j = 0; j < averages.n_rows; ++j) { // Ensure that the user hasn't already rated the item. if (cleanedData(j, users(i)) != 0.0) continue; // The user already rated the item. // Is the estimated value better than the worst candidate? const double value = averages[j]; if (value > values(values.n_rows - 1, i)) { // It should be inserted. Which position? size_t insertPosition = values.n_rows - 1; while (insertPosition > 0) { if (value <= values(insertPosition - 1, i)) break; // The current value is the right one. insertPosition--; } // Now insert it into the list. InsertNeighbor(i, insertPosition, j, value, recommendations, values); } } // If we were not able to come up with enough recommendations, issue a // warning. if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1) Log::Warn << "Could not provide " << values.n_rows << " recommendations " << "for user " << users(i) << " (not enough un-rated items)!" << std::endl; } }