Beispiel #1
0
void FastMKS<KernelType, TreeType>::Search(TreeType* queryTree,
                                           const size_t k,
                                           arma::Mat<size_t>& indices,
                                           arma::mat& kernels)
{
  // If either naive mode or single mode is specified, this must fail.
  if (naive || singleMode)
  {
    throw std::invalid_argument("can't call Search() with a query tree when "
        "single mode or naive search is enabled");
  }

  // No remapping will be necessary because we are using the cover tree.
  indices.set_size(k, queryTree->Dataset().n_cols);
  kernels.set_size(k, queryTree->Dataset().n_cols);
  kernels.fill(-DBL_MAX);

  Timer::Start("computing_products");
  typedef FastMKSRules<KernelType, TreeType> RuleType;
  RuleType rules(referenceSet, queryTree->Dataset(), indices, kernels,
      metric.Kernel());

  typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);

  traverser.Traverse(*queryTree, *referenceTree);

  Log::Info << rules.BaseCases() << " base cases." << std::endl;
  Log::Info << rules.Scores() << " scores." << std::endl;

  Timer::Stop("computing_products");
}
void getEalpha_endorseIRT (const arma::mat &ystar,
                           const arma::mat &beta,
                           const arma::mat &theta,
                           const arma::mat &w,
                           const arma::mat &gamma,
                           const arma::mat &mu,
                           const arma::mat &sigma,
                           const int N,
                           const int J,
                           arma::mat &Ealpha,
                           arma::mat &Valpha,
                           const arma::mat &theta2,
                           const arma::mat &w2
                           ) {
    // arma::mat Ealpha(J, 1) ;
    // Ealpha.fill(0.0) ;

    Valpha.fill(pow((N + 1/sigma(0, 0)), -1)) ;

#pragma omp parallel for
    for (int j = 0 ; j < J ; j++) {

        double q1 = mu(0, 0) / sigma(0, 0) ;

        for (int n = 0 ; n < N ; n++) {

            q1 = q1 +
                (ystar(n, j) -
                 beta(n, 0) +
                 gamma(0, 0) * (pow(theta(n, 0), 2)
                                - 2 * theta(n, 0) * w(j, 0)
                                + pow(w(j, 0), 2)
                                )
                 ) ;

        }

        Ealpha(j, 0) = Valpha(j, 0) * (q1) ;
    }

    // return(Ealpha) ;
}
Beispiel #3
0
void LSHSearch<SortPolicy>::
Search(const size_t k,
       arma::Mat<size_t>& resultingNeighbors,
       arma::mat& distances,
       const size_t numTablesToSearch)
{
  // Set the size of the neighbor and distance matrices.
  resultingNeighbors.set_size(k, querySet.n_cols);
  distances.set_size(k, querySet.n_cols);
  distances.fill(SortPolicy::WorstDistance());
  resultingNeighbors.fill(referenceSet.n_cols);

  size_t avgIndicesReturned = 0;

  Timer::Start("computing_neighbors");

  // Go through every query point sequentially.
  for (size_t i = 0; i < querySet.n_cols; i++)
  {
    // Hash every query into every hash table and eventually into the
    // 'secondHashTable' to obtain the neighbor candidates.
    arma::uvec refIndices;
    ReturnIndicesFromTable(i, refIndices, numTablesToSearch);

    // An informative book-keeping for the number of neighbor candidates
    // returned on average.
    avgIndicesReturned += refIndices.n_elem;

    // Sequentially go through all the candidates and save the best 'k'
    // candidates.
    for (size_t j = 0; j < refIndices.n_elem; j++)
      BaseCase(distances, resultingNeighbors, i, (size_t) refIndices[j]);
  }

  Timer::Stop("computing_neighbors");

  distanceEvaluations += avgIndicesReturned;
  avgIndicesReturned /= querySet.n_cols;
  Log::Info << avgIndicesReturned << " distinct indices returned on average." <<
      std::endl;
}
Beispiel #4
0
////> Simulation functions ////
// [[Rcpp::export]]
List simulPotts_cpp (const S4& W_SR, const S4& W_LR, arma::mat sample, const arma::mat& coords, 
                     const IntegerVector& site_order, 
                     NumericVector rho, NumericVector distance_ref, 
                     int iter_max, double cv_criterion, bool regional, bool verbose){
  
   // attention W_SR est lu par lignes donc si elle doit etre normalisee c est par lignes !!!
  
  //// initialization
  
  // diagnostic progress
  Progress testUser(verbose *CST_PROGRESS, verbose);
  double value_trace = 0 ;
  
  // variables
  int n = sample.n_rows;
  const int p = sample.n_cols;
  vector < int > W_i = W_SR.slot("i");
  vector < int > W_p = W_SR.slot("p");
  vector < double > W_x = W_SR.slot("x");
  
  IntegerVector rang(n);
  bool no_site_order = (site_order[0] < 0);
  if(no_site_order == false){
    rang = site_order;
  }
  IntegerVector tirage_multinom(p); // int tirage_multinom[p]; //  (generate a warning on linux : warning variable length arrays are a C99 feature)
  NumericVector proba_site(p); //double proba_site[p]; // (generate a warning on linux : warning variable length arrays are a C99 feature)
  
  int index_px;
  arma::mat Wpred(n, p);
  double norm;
  
  // regional
  arma::mat V(n, p);
  std::fill(V.begin(), V.end(), 0);
  vector < double > sampleCol(n);
  List res_multipotentiel ;
  
  // diagnostic convergence
  bool check_cv = (cv_criterion > 0);
  arma::mat proba_hist(check_cv *n, check_cv *p);
  double val_criterion = cv_criterion + 1 ;
  bool test;
  
  //// main loop
  for(int iter = 0; iter < iter_max ; iter++){
    
    // diagnostic
    if(verbose && iter>=value_trace){
      value_trace = min(1.0 *iter_max, value_trace + iter_max / CST_PROGRESS);
      testUser.increment();
    }
    if (Progress::check_abort() ){
      sample.fill(NA_REAL);
      V.fill(NA_REAL);
      return Rcpp::List::create(Rcpp::Named("simulation") = sample, 
                                Rcpp::Named("V") = V, 
                                Rcpp::Named("cv") = false
      );
    }

    // sort site order
    if(no_site_order){
      rang = rank_hpp(runif(n)) - 1; // tirer aleatoirement l ordre des sites
    }
    
    if(regional){   // regional potential
      for(int iter_p = 0 ; iter_p < p ; iter_p++){
        
        for(int iter_obs = 0 ; iter_obs < n ; iter_obs++){
          sampleCol[iter_obs] = sample(iter_obs, iter_p); // colvec to vector < double > 
        }
        
        for(int iter_px = 0 ; iter_px < n ; iter_px++){
          
          res_multipotentiel =  calcMultiPotential_hpp(W_SR, W_LR,  sampleCol, 0.01, 
                                                       coords, Rcpp::as < std::vector < double > >(distance_ref), 
                                                       true, 10, 0.5);  
          
          V.col(iter_p) = as < arma::vec >(res_multipotentiel[0]);
        }
      }
    }

    for(int iter_px = 0 ; iter_px < n ; iter_px++){ // pour chaque pixel
      
      norm = 0.0;
      index_px = rang[iter_px];
      
      for(int iter_p = 0 ; iter_p < p ; iter_p++){ // pour chaque groupe
        
        Wpred(index_px, iter_p) = 0.0;
        
        // contribution de chaque voisin a la densite
        for(int iter_vois = W_p[index_px]; iter_vois < W_p[index_px + 1]; iter_vois++){
          
          Wpred(index_px, iter_p) += W_x[iter_vois] *sample(W_i[iter_vois], iter_p);
          
        }
        
        // exponentielle rho
        Wpred(index_px, iter_p) = exp(rho[0] * Wpred(index_px, iter_p) + rho[1] *V(index_px, iter_p)) ;
        norm = norm + Wpred(index_px, iter_p);
      } 
      
      for(int iter_p = 0; iter_p < p; iter_p++){
        proba_site[iter_p] = Wpred(index_px, iter_p) / norm;
        
        if(check_cv){
          if(iter > 0){
            test = abs(proba_hist(iter_px, iter_p) - proba_site[iter_p]) ;
            if(test > val_criterion){val_criterion = test;}
            proba_hist(iter_px, iter_p) = proba_site[iter_p];  
          }
        }
      } 
      rmultinom(1, proba_site.begin(), p, tirage_multinom.begin()); // rmultinom(1, proba_site, p, tirage_multinom); //  (alternative version with the warning)
      
      for(int iter_p = 0; iter_p < p; iter_p++){
        sample(index_px, iter_p) = tirage_multinom[iter_p];
      }
      
    }
    
    if(check_cv){
      if(val_criterion < cv_criterion){break;}
    }
  }
  
    // cv
    bool test_cv;
    if(check_cv){
      test_cv = val_criterion < cv_criterion;
    }else{
      test_cv  = NA_REAL;
    }
    
    // export
    return Rcpp::List::create(Rcpp::Named("simulation") = sample, 
                              Rcpp::Named("V") = V, 
                              Rcpp::Named("cv_criterion") = cv_criterion, 
                              Rcpp::Named("cv") = test_cv
    );
}
tuple<double, double, int, int, double, double> simulate(const arma::Col<double> &Y, const vector<int> X, 
                                                         double sigma, bool varianceKnown,
                                                         arma::mat &Z, mt19937_64 &rng,
                                                         bool interceptTerm) {
  bernoulli_distribution bernoulli(0.5);
  int N = X.size();
  Z.fill(0);
  // bestColumns[k] keeps track of the k + 1 or k + 2 columns that produce the smallest p-value depending on interceptTerm
  vector<arma::uvec> bestColumns; bestColumns.reserve(N - 1); 
  if (interceptTerm) { // make intercept term last column of Z
    fill(Z.begin_col(N - 1), Z.end_col(N - 1), 1);
    copy(X.begin(), X.end(), Z.begin_col(0));
    bestColumns.push_back(arma::uvec{0, (unsigned long long) N - 1ULL}); 
  } else {
    copy(X.begin(), X.end(), Z.begin_col(0));
    bestColumns.push_back(arma::uvec{0});     
  }  
  // bestPValues[k] corresponds to p-value if the columns bestColumns[k] are used
  vector<pair<double, double>> bestPValues; bestPValues.reserve(N - 1);
  bestPValues.push_back(calculateBetaPValue(Z.cols(bestColumns.front()), Y, sigma, varianceKnown));
  if (bestPValues.front().first <= 0.05) {
    return make_tuple(bestPValues.front().first, bestPValues.front().second, 0, 0, -1, bestPValues.front().first);
  } else {                    // need more covariates
    bool done = false;
    int smallestSubsetSize = INT_MAX;
    /* add covariates one-by-one, we always include the treatment
     * if we're using the intercept two covariates are included by default
     */
    for (int j = 1; j < N - 2 || (j == N - 2 && !interceptTerm); ++j) { 
      for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng);
      if (!interceptTerm) {
        while (arma::rank(Z) <= j) {
          for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng);
        }        
      } else { // offset rank by 1 for intercept term
        while (arma::rank(Z) <= j + 1) {
          for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng);
        }        
      }
      for (int k = j; k >= 1; --k) { // loop through subset sizes, k is the number of additional covariates
        pair<double, double> newPValue;
        if (k == j) {           // use all available covariates
          bestColumns.emplace_back(bestColumns.back().n_rows + 1); // add one more to biggest subset
          for (int l = 0; l < bestColumns.back().n_rows - 1; ++l) { 
            bestColumns.back()(l) = bestColumns[j - 1](l); // copy over from original subset
          }
          bestColumns.back()(bestColumns.back().n_rows - 1) = j; // add new covariate
          newPValue = calculateBetaPValue(Z.cols(bestColumns.back()), Y, sigma, varianceKnown);
          bestPValues.push_back(newPValue);
        } else {                // make a new subset of same size with new covariate
          arma::uvec columnSubset(bestColumns[k].n_rows); 
          for (int l = 0; l < columnSubset.n_rows - 1; ++l) 
            columnSubset(l) = bestColumns[k - 1](l); // copy over from smaller subset
          columnSubset(columnSubset.n_rows - 1) = j; // add new covariate
          newPValue = calculateBetaPValue(Z.cols(columnSubset), Y, sigma, varianceKnown);
          if (bestPValues[k].first > newPValue.first) { // if better subset replace
            bestPValues[k] = newPValue;
            bestColumns[k] = columnSubset;
          }
        }
        if (newPValue.first <= 0.05) { // stop when we reach significance
          done = true;
          smallestSubsetSize = k;
        }
      }
      if (done) {
        // compute balance p value in special case that only 1 covariate was needed
        double balancePValue = -1;
        if (smallestSubsetSize == 1 && !interceptTerm) {
          balancePValue = testBalance(Z.col(bestColumns[1](1)), Z.col(0));
        } else if (smallestSubsetSize == 1 && interceptTerm) {
          balancePValue = testBalance(Z.col(bestColumns[1](2)), Z.col(0));
        }
        return make_tuple(bestPValues.front().first, bestPValues[smallestSubsetSize].second, 
                          j, smallestSubsetSize, balancePValue, bestPValues[smallestSubsetSize].first); 
      }
    }    
  }  
  return make_tuple(bestPValues.front().first, bestPValues.front().second, -1, -1, -1, bestPValues.front().first);
}
Beispiel #6
0
void FastMKS<KernelType, TreeType>::Search(
    const typename TreeType::Mat& querySet,
    const size_t k,
    arma::Mat<size_t>& indices,
    arma::mat& kernels)
{
  Timer::Start("computing_products");

  // No remapping will be necessary because we are using the cover tree.
  indices.set_size(k, querySet.n_cols);
  kernels.set_size(k, querySet.n_cols);

  // Naive implementation.
  if (naive)
  {
    // Fill kernels.
    kernels.fill(-DBL_MAX);

    // Simple double loop.  Stupid, slow, but a good benchmark.
    for (size_t q = 0; q < querySet.n_cols; ++q)
    {
      for (size_t r = 0; r < referenceSet.n_cols; ++r)
      {
        const double eval = metric.Kernel().Evaluate(querySet.col(q),
                                                     referenceSet.col(r));

        size_t insertPosition;
        for (insertPosition = 0; insertPosition < indices.n_rows;
            ++insertPosition)
          if (eval > kernels(insertPosition, q))
            break;

        if (insertPosition < indices.n_rows)
          InsertNeighbor(indices, kernels, q, insertPosition, r, eval);
      }
    }

    Timer::Stop("computing_products");

    return;
  }

  // Single-tree implementation.
  if (singleMode)
  {
    // Fill kernels.
    kernels.fill(-DBL_MAX);

    // Create rules object (this will store the results).  This constructor
    // precalculates each self-kernel value.
    typedef FastMKSRules<KernelType, TreeType> RuleType;
    RuleType rules(referenceSet, querySet, indices, kernels, metric.Kernel());

    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    for (size_t i = 0; i < querySet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);

    Log::Info << rules.BaseCases() << " base cases." << std::endl;
    Log::Info << rules.Scores() << " scores." << std::endl;

    Timer::Stop("computing_products");
    return;
  }

  // Dual-tree implementation.  First, we need to build the query tree.  We are
  // assuming it doesn't map anything...
  Timer::Stop("computing_products");
  Timer::Start("tree_building");
  TreeType queryTree(querySet);
  Timer::Stop("tree_building");

  Search(&queryTree, k, indices, kernels);
}
Beispiel #7
0
void FastMKS<KernelType, TreeType>::Search(const size_t k,
                                           arma::Mat<size_t>& indices,
                                           arma::mat& kernels)
{
  // No remapping will be necessary because we are using the cover tree.
  Timer::Start("computing_products");
  indices.set_size(k, referenceSet.n_cols);
  kernels.set_size(k, referenceSet.n_cols);
  kernels.fill(-DBL_MAX);

  // Naive implementation.
  if (naive)
  {
    // Simple double loop.  Stupid, slow, but a good benchmark.
    for (size_t q = 0; q < referenceSet.n_cols; ++q)
    {
      for (size_t r = 0; r < referenceSet.n_cols; ++r)
      {
        if (q == r)
          continue; // Don't return the point as its own candidate.

        const double eval = metric.Kernel().Evaluate(referenceSet.col(q),
                                                     referenceSet.col(r));

        size_t insertPosition;
        for (insertPosition = 0; insertPosition < indices.n_rows;
            ++insertPosition)
          if (eval > kernels(insertPosition, q))
            break;

        if (insertPosition < indices.n_rows)
          InsertNeighbor(indices, kernels, q, insertPosition, r, eval);
      }
    }

    Timer::Stop("computing_products");

    return;
  }

  // Single-tree implementation.
  if (singleMode)
  {
    // Create rules object (this will store the results).  This constructor
    // precalculates each self-kernel value.
    typedef FastMKSRules<KernelType, TreeType> RuleType;
    RuleType rules(referenceSet, referenceSet, indices, kernels,
        metric.Kernel());

    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    for (size_t i = 0; i < referenceSet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);

    // Save the number of pruned nodes.
    const size_t numPrunes = traverser.NumPrunes();

    Log::Info << "Pruned " << numPrunes << " nodes." << std::endl;

    Log::Info << rules.BaseCases() << " base cases." << std::endl;
    Log::Info << rules.Scores() << " scores." << std::endl;

    Timer::Stop("computing_products");
    return;
  }

  // Dual-tree implementation.
  Timer::Stop("computing_products");

  Search(referenceTree, k, indices, kernels);
}