void FastMKS<KernelType, TreeType>::Search(TreeType* queryTree, const size_t k, arma::Mat<size_t>& indices, arma::mat& kernels) { // If either naive mode or single mode is specified, this must fail. if (naive || singleMode) { throw std::invalid_argument("can't call Search() with a query tree when " "single mode or naive search is enabled"); } // No remapping will be necessary because we are using the cover tree. indices.set_size(k, queryTree->Dataset().n_cols); kernels.set_size(k, queryTree->Dataset().n_cols); kernels.fill(-DBL_MAX); Timer::Start("computing_products"); typedef FastMKSRules<KernelType, TreeType> RuleType; RuleType rules(referenceSet, queryTree->Dataset(), indices, kernels, metric.Kernel()); typename TreeType::template DualTreeTraverser<RuleType> traverser(rules); traverser.Traverse(*queryTree, *referenceTree); Log::Info << rules.BaseCases() << " base cases." << std::endl; Log::Info << rules.Scores() << " scores." << std::endl; Timer::Stop("computing_products"); }
void getEalpha_endorseIRT (const arma::mat &ystar, const arma::mat &beta, const arma::mat &theta, const arma::mat &w, const arma::mat &gamma, const arma::mat &mu, const arma::mat &sigma, const int N, const int J, arma::mat &Ealpha, arma::mat &Valpha, const arma::mat &theta2, const arma::mat &w2 ) { // arma::mat Ealpha(J, 1) ; // Ealpha.fill(0.0) ; Valpha.fill(pow((N + 1/sigma(0, 0)), -1)) ; #pragma omp parallel for for (int j = 0 ; j < J ; j++) { double q1 = mu(0, 0) / sigma(0, 0) ; for (int n = 0 ; n < N ; n++) { q1 = q1 + (ystar(n, j) - beta(n, 0) + gamma(0, 0) * (pow(theta(n, 0), 2) - 2 * theta(n, 0) * w(j, 0) + pow(w(j, 0), 2) ) ) ; } Ealpha(j, 0) = Valpha(j, 0) * (q1) ; } // return(Ealpha) ; }
void LSHSearch<SortPolicy>:: Search(const size_t k, arma::Mat<size_t>& resultingNeighbors, arma::mat& distances, const size_t numTablesToSearch) { // Set the size of the neighbor and distance matrices. resultingNeighbors.set_size(k, querySet.n_cols); distances.set_size(k, querySet.n_cols); distances.fill(SortPolicy::WorstDistance()); resultingNeighbors.fill(referenceSet.n_cols); size_t avgIndicesReturned = 0; Timer::Start("computing_neighbors"); // Go through every query point sequentially. for (size_t i = 0; i < querySet.n_cols; i++) { // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; ReturnIndicesFromTable(i, refIndices, numTablesToSearch); // An informative book-keeping for the number of neighbor candidates // returned on average. avgIndicesReturned += refIndices.n_elem; // Sequentially go through all the candidates and save the best 'k' // candidates. for (size_t j = 0; j < refIndices.n_elem; j++) BaseCase(distances, resultingNeighbors, i, (size_t) refIndices[j]); } Timer::Stop("computing_neighbors"); distanceEvaluations += avgIndicesReturned; avgIndicesReturned /= querySet.n_cols; Log::Info << avgIndicesReturned << " distinct indices returned on average." << std::endl; }
////> Simulation functions //// // [[Rcpp::export]] List simulPotts_cpp (const S4& W_SR, const S4& W_LR, arma::mat sample, const arma::mat& coords, const IntegerVector& site_order, NumericVector rho, NumericVector distance_ref, int iter_max, double cv_criterion, bool regional, bool verbose){ // attention W_SR est lu par lignes donc si elle doit etre normalisee c est par lignes !!! //// initialization // diagnostic progress Progress testUser(verbose *CST_PROGRESS, verbose); double value_trace = 0 ; // variables int n = sample.n_rows; const int p = sample.n_cols; vector < int > W_i = W_SR.slot("i"); vector < int > W_p = W_SR.slot("p"); vector < double > W_x = W_SR.slot("x"); IntegerVector rang(n); bool no_site_order = (site_order[0] < 0); if(no_site_order == false){ rang = site_order; } IntegerVector tirage_multinom(p); // int tirage_multinom[p]; // (generate a warning on linux : warning variable length arrays are a C99 feature) NumericVector proba_site(p); //double proba_site[p]; // (generate a warning on linux : warning variable length arrays are a C99 feature) int index_px; arma::mat Wpred(n, p); double norm; // regional arma::mat V(n, p); std::fill(V.begin(), V.end(), 0); vector < double > sampleCol(n); List res_multipotentiel ; // diagnostic convergence bool check_cv = (cv_criterion > 0); arma::mat proba_hist(check_cv *n, check_cv *p); double val_criterion = cv_criterion + 1 ; bool test; //// main loop for(int iter = 0; iter < iter_max ; iter++){ // diagnostic if(verbose && iter>=value_trace){ value_trace = min(1.0 *iter_max, value_trace + iter_max / CST_PROGRESS); testUser.increment(); } if (Progress::check_abort() ){ sample.fill(NA_REAL); V.fill(NA_REAL); return Rcpp::List::create(Rcpp::Named("simulation") = sample, Rcpp::Named("V") = V, Rcpp::Named("cv") = false ); } // sort site order if(no_site_order){ rang = rank_hpp(runif(n)) - 1; // tirer aleatoirement l ordre des sites } if(regional){ // regional potential for(int iter_p = 0 ; iter_p < p ; iter_p++){ for(int iter_obs = 0 ; iter_obs < n ; iter_obs++){ sampleCol[iter_obs] = sample(iter_obs, iter_p); // colvec to vector < double > } for(int iter_px = 0 ; iter_px < n ; iter_px++){ res_multipotentiel = calcMultiPotential_hpp(W_SR, W_LR, sampleCol, 0.01, coords, Rcpp::as < std::vector < double > >(distance_ref), true, 10, 0.5); V.col(iter_p) = as < arma::vec >(res_multipotentiel[0]); } } } for(int iter_px = 0 ; iter_px < n ; iter_px++){ // pour chaque pixel norm = 0.0; index_px = rang[iter_px]; for(int iter_p = 0 ; iter_p < p ; iter_p++){ // pour chaque groupe Wpred(index_px, iter_p) = 0.0; // contribution de chaque voisin a la densite for(int iter_vois = W_p[index_px]; iter_vois < W_p[index_px + 1]; iter_vois++){ Wpred(index_px, iter_p) += W_x[iter_vois] *sample(W_i[iter_vois], iter_p); } // exponentielle rho Wpred(index_px, iter_p) = exp(rho[0] * Wpred(index_px, iter_p) + rho[1] *V(index_px, iter_p)) ; norm = norm + Wpred(index_px, iter_p); } for(int iter_p = 0; iter_p < p; iter_p++){ proba_site[iter_p] = Wpred(index_px, iter_p) / norm; if(check_cv){ if(iter > 0){ test = abs(proba_hist(iter_px, iter_p) - proba_site[iter_p]) ; if(test > val_criterion){val_criterion = test;} proba_hist(iter_px, iter_p) = proba_site[iter_p]; } } } rmultinom(1, proba_site.begin(), p, tirage_multinom.begin()); // rmultinom(1, proba_site, p, tirage_multinom); // (alternative version with the warning) for(int iter_p = 0; iter_p < p; iter_p++){ sample(index_px, iter_p) = tirage_multinom[iter_p]; } } if(check_cv){ if(val_criterion < cv_criterion){break;} } } // cv bool test_cv; if(check_cv){ test_cv = val_criterion < cv_criterion; }else{ test_cv = NA_REAL; } // export return Rcpp::List::create(Rcpp::Named("simulation") = sample, Rcpp::Named("V") = V, Rcpp::Named("cv_criterion") = cv_criterion, Rcpp::Named("cv") = test_cv ); }
tuple<double, double, int, int, double, double> simulate(const arma::Col<double> &Y, const vector<int> X, double sigma, bool varianceKnown, arma::mat &Z, mt19937_64 &rng, bool interceptTerm) { bernoulli_distribution bernoulli(0.5); int N = X.size(); Z.fill(0); // bestColumns[k] keeps track of the k + 1 or k + 2 columns that produce the smallest p-value depending on interceptTerm vector<arma::uvec> bestColumns; bestColumns.reserve(N - 1); if (interceptTerm) { // make intercept term last column of Z fill(Z.begin_col(N - 1), Z.end_col(N - 1), 1); copy(X.begin(), X.end(), Z.begin_col(0)); bestColumns.push_back(arma::uvec{0, (unsigned long long) N - 1ULL}); } else { copy(X.begin(), X.end(), Z.begin_col(0)); bestColumns.push_back(arma::uvec{0}); } // bestPValues[k] corresponds to p-value if the columns bestColumns[k] are used vector<pair<double, double>> bestPValues; bestPValues.reserve(N - 1); bestPValues.push_back(calculateBetaPValue(Z.cols(bestColumns.front()), Y, sigma, varianceKnown)); if (bestPValues.front().first <= 0.05) { return make_tuple(bestPValues.front().first, bestPValues.front().second, 0, 0, -1, bestPValues.front().first); } else { // need more covariates bool done = false; int smallestSubsetSize = INT_MAX; /* add covariates one-by-one, we always include the treatment * if we're using the intercept two covariates are included by default */ for (int j = 1; j < N - 2 || (j == N - 2 && !interceptTerm); ++j) { for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng); if (!interceptTerm) { while (arma::rank(Z) <= j) { for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng); } } else { // offset rank by 1 for intercept term while (arma::rank(Z) <= j + 1) { for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng); } } for (int k = j; k >= 1; --k) { // loop through subset sizes, k is the number of additional covariates pair<double, double> newPValue; if (k == j) { // use all available covariates bestColumns.emplace_back(bestColumns.back().n_rows + 1); // add one more to biggest subset for (int l = 0; l < bestColumns.back().n_rows - 1; ++l) { bestColumns.back()(l) = bestColumns[j - 1](l); // copy over from original subset } bestColumns.back()(bestColumns.back().n_rows - 1) = j; // add new covariate newPValue = calculateBetaPValue(Z.cols(bestColumns.back()), Y, sigma, varianceKnown); bestPValues.push_back(newPValue); } else { // make a new subset of same size with new covariate arma::uvec columnSubset(bestColumns[k].n_rows); for (int l = 0; l < columnSubset.n_rows - 1; ++l) columnSubset(l) = bestColumns[k - 1](l); // copy over from smaller subset columnSubset(columnSubset.n_rows - 1) = j; // add new covariate newPValue = calculateBetaPValue(Z.cols(columnSubset), Y, sigma, varianceKnown); if (bestPValues[k].first > newPValue.first) { // if better subset replace bestPValues[k] = newPValue; bestColumns[k] = columnSubset; } } if (newPValue.first <= 0.05) { // stop when we reach significance done = true; smallestSubsetSize = k; } } if (done) { // compute balance p value in special case that only 1 covariate was needed double balancePValue = -1; if (smallestSubsetSize == 1 && !interceptTerm) { balancePValue = testBalance(Z.col(bestColumns[1](1)), Z.col(0)); } else if (smallestSubsetSize == 1 && interceptTerm) { balancePValue = testBalance(Z.col(bestColumns[1](2)), Z.col(0)); } return make_tuple(bestPValues.front().first, bestPValues[smallestSubsetSize].second, j, smallestSubsetSize, balancePValue, bestPValues[smallestSubsetSize].first); } } } return make_tuple(bestPValues.front().first, bestPValues.front().second, -1, -1, -1, bestPValues.front().first); }
void FastMKS<KernelType, TreeType>::Search( const typename TreeType::Mat& querySet, const size_t k, arma::Mat<size_t>& indices, arma::mat& kernels) { Timer::Start("computing_products"); // No remapping will be necessary because we are using the cover tree. indices.set_size(k, querySet.n_cols); kernels.set_size(k, querySet.n_cols); // Naive implementation. if (naive) { // Fill kernels. kernels.fill(-DBL_MAX); // Simple double loop. Stupid, slow, but a good benchmark. for (size_t q = 0; q < querySet.n_cols; ++q) { for (size_t r = 0; r < referenceSet.n_cols; ++r) { const double eval = metric.Kernel().Evaluate(querySet.col(q), referenceSet.col(r)); size_t insertPosition; for (insertPosition = 0; insertPosition < indices.n_rows; ++insertPosition) if (eval > kernels(insertPosition, q)) break; if (insertPosition < indices.n_rows) InsertNeighbor(indices, kernels, q, insertPosition, r, eval); } } Timer::Stop("computing_products"); return; } // Single-tree implementation. if (singleMode) { // Fill kernels. kernels.fill(-DBL_MAX); // Create rules object (this will store the results). This constructor // precalculates each self-kernel value. typedef FastMKSRules<KernelType, TreeType> RuleType; RuleType rules(referenceSet, querySet, indices, kernels, metric.Kernel()); typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules); for (size_t i = 0; i < querySet.n_cols; ++i) traverser.Traverse(i, *referenceTree); Log::Info << rules.BaseCases() << " base cases." << std::endl; Log::Info << rules.Scores() << " scores." << std::endl; Timer::Stop("computing_products"); return; } // Dual-tree implementation. First, we need to build the query tree. We are // assuming it doesn't map anything... Timer::Stop("computing_products"); Timer::Start("tree_building"); TreeType queryTree(querySet); Timer::Stop("tree_building"); Search(&queryTree, k, indices, kernels); }
void FastMKS<KernelType, TreeType>::Search(const size_t k, arma::Mat<size_t>& indices, arma::mat& kernels) { // No remapping will be necessary because we are using the cover tree. Timer::Start("computing_products"); indices.set_size(k, referenceSet.n_cols); kernels.set_size(k, referenceSet.n_cols); kernels.fill(-DBL_MAX); // Naive implementation. if (naive) { // Simple double loop. Stupid, slow, but a good benchmark. for (size_t q = 0; q < referenceSet.n_cols; ++q) { for (size_t r = 0; r < referenceSet.n_cols; ++r) { if (q == r) continue; // Don't return the point as its own candidate. const double eval = metric.Kernel().Evaluate(referenceSet.col(q), referenceSet.col(r)); size_t insertPosition; for (insertPosition = 0; insertPosition < indices.n_rows; ++insertPosition) if (eval > kernels(insertPosition, q)) break; if (insertPosition < indices.n_rows) InsertNeighbor(indices, kernels, q, insertPosition, r, eval); } } Timer::Stop("computing_products"); return; } // Single-tree implementation. if (singleMode) { // Create rules object (this will store the results). This constructor // precalculates each self-kernel value. typedef FastMKSRules<KernelType, TreeType> RuleType; RuleType rules(referenceSet, referenceSet, indices, kernels, metric.Kernel()); typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules); for (size_t i = 0; i < referenceSet.n_cols; ++i) traverser.Traverse(i, *referenceTree); // Save the number of pruned nodes. const size_t numPrunes = traverser.NumPrunes(); Log::Info << "Pruned " << numPrunes << " nodes." << std::endl; Log::Info << rules.BaseCases() << " base cases." << std::endl; Log::Info << rules.Scores() << " scores." << std::endl; Timer::Stop("computing_products"); return; } // Dual-tree implementation. Timer::Stop("computing_products"); Search(referenceTree, k, indices, kernels); }