Esempio n. 1
0
void LSHSearch<SortPolicy>::
Search(const size_t k,
       arma::Mat<size_t>& resultingNeighbors,
       arma::mat& distances,
       const size_t numTablesToSearch)
{
  // Set the size of the neighbor and distance matrices.
  resultingNeighbors.set_size(k, querySet.n_cols);
  distances.set_size(k, querySet.n_cols);
  distances.fill(SortPolicy::WorstDistance());
  resultingNeighbors.fill(referenceSet.n_cols);

  size_t avgIndicesReturned = 0;

  Timer::Start("computing_neighbors");

  // Go through every query point sequentially.
  for (size_t i = 0; i < querySet.n_cols; i++)
  {
    // Hash every query into every hash table and eventually into the
    // 'secondHashTable' to obtain the neighbor candidates.
    arma::uvec refIndices;
    ReturnIndicesFromTable(i, refIndices, numTablesToSearch);

    // An informative book-keeping for the number of neighbor candidates
    // returned on average.
    avgIndicesReturned += refIndices.n_elem;

    // Sequentially go through all the candidates and save the best 'k'
    // candidates.
    for (size_t j = 0; j < refIndices.n_elem; j++)
      BaseCase(distances, resultingNeighbors, i, (size_t) refIndices[j]);
  }

  Timer::Stop("computing_neighbors");

  distanceEvaluations += avgIndicesReturned;
  avgIndicesReturned /= querySet.n_cols;
  Log::Info << avgIndicesReturned << " distinct indices returned on average." <<
      std::endl;
}
void DefaultEvaluatorForIntegralOperators<BasisFunctionType, KernelType,
ResultType, GeometryFactory>::evaluate(
        Region region,
        const arma::Mat<CoordinateType>& points, arma::Mat<ResultType>& result) const
{
    const size_t pointCount = points.n_cols;
    const int outputComponentCount = m_integral->resultDimension();

    result.set_size(outputComponentCount, pointCount);
    result.fill(0.);

    const GeometricalData<CoordinateType>& trialGeomData =
            (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ?
                m_nearFieldTrialGeomData :
                m_farFieldTrialGeomData;
    const CollectionOf2dArrays<ResultType>& trialTransfValues =
            (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ?
                m_nearFieldTrialTransfValues :
                m_farFieldTrialTransfValues;
    const std::vector<CoordinateType>& weights =
            (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ?
                m_nearFieldWeights :
                m_farFieldWeights;

    // Do things in chunks of 96 points -- in order to avoid creating
    // too large arrays of kernel values
    const size_t chunkSize = 96;
    const size_t chunkCount = (pointCount + chunkSize - 1) / chunkSize;

    int maxThreadCount = 1;
    if (!m_parallelizationOptions.isOpenClEnabled()) {
        if (m_parallelizationOptions.maxThreadCount() ==
                ParallelizationOptions::AUTO)
            maxThreadCount = tbb::task_scheduler_init::automatic;
        else
            maxThreadCount = m_parallelizationOptions.maxThreadCount();
    }
    tbb::task_scheduler_init scheduler(maxThreadCount);
    typedef EvaluationLoopBody<
            BasisFunctionType, KernelType, ResultType> Body;
    {
        Fiber::SerialBlasRegion region;
        tbb::parallel_for(tbb::blocked_range<size_t>(0, chunkCount),
                          Body(chunkSize,
                               points, trialGeomData, trialTransfValues, weights,
                               *m_kernels, *m_integral, result));
    }

//    // Old serial version
//    CollectionOf4dArrays<KernelType> kernelValues;
//    GeometricalData<CoordinateType> evalPointGeomData;
//    for (size_t start = 0; start < pointCount; start += chunkSize)
//    {
//        size_t end = std::min(start + chunkSize, pointCount);
//        evalPointGeomData.globals = points.cols(start, end - 1 /* inclusive */);
//        m_kernels->evaluateOnGrid(evalPointGeomData, trialGeomData, kernelValues);
//        // View into the current chunk of the "result" array
//        _2dArray<ResultType> resultChunk(outputComponentCount, end - start,
//                                         result.colptr(start));
//        m_integral->evaluate(trialGeomData,
//                             kernelValues,
//                             weightedTrialTransfValues,
//                             resultChunk);
//    }
}
Esempio n. 3
0
void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
                                            arma::Mat<size_t>& recommendations,
                                            arma::Col<size_t>& users)
{
  // Generate new table by multiplying approximate values.
  rating = w * h;

  // Now, we will use the decomposed w and h matrices to estimate what the user
  // would have rated items as, and then pick the best items.

  // Temporarily store feature vector of queried users.
  arma::mat query(rating.n_rows, users.n_elem);

  // Select feature vectors of queried users.
  for (size_t i = 0; i < users.n_elem; i++)
    query.col(i) = rating.col(users(i));

  // Temporary storage for neighborhood of the queried users.
  arma::Mat<size_t> neighborhood;

  // Calculate the neighborhood of the queried users.
  // This should be a templatized option.
  neighbor::AllkNN a(rating);
  arma::mat resultingDistances; // Temporary storage.
  a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);

  // Temporary storage for storing the average rating for each user in their
  // neighborhood.
  arma::mat averages = arma::zeros<arma::mat>(rating.n_rows, query.n_cols);

  // Iterate over each query user.
  for (size_t i = 0; i < neighborhood.n_cols; ++i)
  {
    // Iterate over each neighbor of the query user.
    for (size_t j = 0; j < neighborhood.n_rows; ++j)
      averages.col(i) += rating.col(neighborhood(j, i));
    // Normalize average.
    averages.col(i) /= neighborhood.n_rows;
  }

  // Generate recommendations for each query user by finding the maximum numRecs
  // elements in the averages matrix.
  recommendations.set_size(numRecs, users.n_elem);
  recommendations.fill(cleanedData.n_rows); // Invalid item number.
  arma::mat values(numRecs, users.n_elem);
  values.fill(-DBL_MAX); // The smallest possible value.
  for (size_t i = 0; i < users.n_elem; i++)
  {
    // Look through the averages column corresponding to the current user.
    for (size_t j = 0; j < averages.n_rows; ++j)
    {
      // Ensure that the user hasn't already rated the item.
      if (cleanedData(j, users(i)) != 0.0)
        continue; // The user already rated the item.

      // Is the estimated value better than the worst candidate?
      const double value = averages(j, i);
      if (value > values(values.n_rows - 1, i))
      {
        // It should be inserted.  Which position?
        size_t insertPosition = values.n_rows - 1;
        while (insertPosition > 0)
        {
          if (value <= values(insertPosition - 1, i))
            break; // The current value is the right one.
          insertPosition--;
        }

        // Now insert it into the list.
        InsertNeighbor(i, insertPosition, j, value, recommendations,
            values);
      }
    }

    // If we were not able to come up with enough recommendations, issue a
    // warning.
    if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
      Log::Warn << "Could not provide " << values.n_rows << " recommendations "
          << "for user " << users(i) << " (not enough un-rated items)!"
          << std::endl;
  }
}
Esempio n. 4
0
void CF::GetRecommendations(const size_t numRecs,
                            arma::Mat<size_t>& recommendations,
                            arma::Col<size_t>& users)
{
  // We want to avoid calculating the full rating matrix, so we will do nearest
  // neighbor search only on the H matrix, using the observation that if the
  // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W
  // H.col(j)).  This can be seen as nearest neighbor search on the H matrix
  // with the Mahalanobis distance where M^{-1} = W^T W.  So, we'll decompose
  // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T.
  // Then we can perform nearest neighbor search.
  arma::mat l = arma::chol(w.t() * w);
  arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.

  // Now, we will use the decomposed w and h matrices to estimate what the user
  // would have rated items as, and then pick the best items.

  // Temporarily store feature vector of queried users.
  arma::mat query(stretchedH.n_rows, users.n_elem);

  // Select feature vectors of queried users.
  for (size_t i = 0; i < users.n_elem; i++)
    query.col(i) = stretchedH.col(users(i));

  // Temporary storage for neighborhood of the queried users.
  arma::Mat<size_t> neighborhood;

  // Calculate the neighborhood of the queried users.
  // This should be a templatized option.
  neighbor::KNN a(stretchedH);
  arma::mat resultingDistances; // Temporary storage.
  a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);

  // Generate recommendations for each query user by finding the maximum numRecs
  // elements in the averages matrix.
  recommendations.set_size(numRecs, users.n_elem);
  recommendations.fill(cleanedData.n_rows); // Invalid item number.
  arma::mat values(numRecs, users.n_elem);
  values.fill(-DBL_MAX); // The smallest possible value.
  for (size_t i = 0; i < users.n_elem; i++)
  {
    // First, calculate average of neighborhood values.
    arma::vec averages;
    averages.zeros(cleanedData.n_rows);

    for (size_t j = 0; j < neighborhood.n_rows; ++j)
      averages += w * h.col(neighborhood(j, i));
    averages /= neighborhood.n_rows;

    // Look through the averages column corresponding to the current user.
    for (size_t j = 0; j < averages.n_rows; ++j)
    {
      // Ensure that the user hasn't already rated the item.
      if (cleanedData(j, users(i)) != 0.0)
        continue; // The user already rated the item.

      // Is the estimated value better than the worst candidate?
      const double value = averages[j];
      if (value > values(values.n_rows - 1, i))
      {
        // It should be inserted.  Which position?
        size_t insertPosition = values.n_rows - 1;
        while (insertPosition > 0)
        {
          if (value <= values(insertPosition - 1, i))
            break; // The current value is the right one.
          insertPosition--;
        }

        // Now insert it into the list.
        InsertNeighbor(i, insertPosition, j, value, recommendations,
            values);
      }
    }

    // If we were not able to come up with enough recommendations, issue a
    // warning.
    if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
      Log::Warn << "Could not provide " << values.n_rows << " recommendations "
          << "for user " << users(i) << " (not enough un-rated items)!"
          << std::endl;
  }
}