/** * Calculates and stores the gradient values given a set of parameters. */ void SoftmaxRegressionFunction::Gradient(const arma::mat& parameters, arma::mat& gradient) const { // Calculate the class probabilities for each training example. The // probabilities for each of the classes are given by: // p_j = exp(theta_j' * x_i) / sum(exp(theta_k' * x_i)) // The sum is calculated over all the classes. // x_i is the input vector for a particular training example. // theta_j is the parameter vector associated with a particular class. arma::mat probabilities; GetProbabilitiesMatrix(parameters, probabilities); // Calculate the parameter gradients. gradient.set_size(parameters.n_rows, parameters.n_cols); if (fitIntercept) { // Treating the intercept term parameters.col(0) seperately to avoid // the cost of building matrix [1; data]. arma::mat inner = probabilities - groundTruth; gradient.col(0) = inner * arma::ones<arma::mat>(data.n_cols, 1) / data.n_cols + lambda * parameters.col(0); gradient.cols(1, parameters.n_cols - 1) = inner * data.t() / data.n_cols + lambda * parameters.cols(1, parameters.n_cols - 1); } else { gradient = (probabilities - groundTruth) * data.t() / data.n_cols + lambda * parameters; } }
void subspaceIdMoor::buildNMatrix(arma::uword k, arma::mat const &M, arma::mat const &L1, arma::mat const &L2, arma::mat const &X, arma::uword i, arma::uword n, arma::uword ny, arma::mat &N){ mat Upper, Lower; Upper = join_horiz(M.cols((k - 1)*ny, ny*i - 1) - L1.cols((k-1)*ny, ny*i - 1), zeros(n, (k-1)*ny)); Lower = join_horiz(-L2.cols((k - 1) * ny, ny*i - 1), zeros(ny, (k - 1)*ny)); N = join_vert(Upper, Lower); if (k == 1) N.submat(n, 0, n + ny - 1, ny - 1) = eye(ny, ny) + N.submat(n, 0, n + ny - 1, ny - 1); N = N * X; }
/** * Evaluate the probabilities matrix. If fitIntercept flag is true, * it should consider the parameters.cols(0) intercept term. */ void SoftmaxRegressionFunction::GetProbabilitiesMatrix( const arma::mat& parameters, arma::mat& probabilities) const { arma::mat hypothesis; if (fitIntercept) { // In order to add the intercept term, we should compute following matrix: // [1; data] = arma::join_cols(ones(1, data.n_cols), data) // hypothesis = arma::exp(parameters * [1; data]). // // Since the cost of join maybe high due to the copy of original data, // split the hypothesis computation to two components. hypothesis = arma::exp(arma::repmat(parameters.col(0), 1, data.n_cols) + parameters.cols(1, parameters.n_cols - 1) * data); } else { hypothesis = arma::exp(parameters * data); } probabilities = hypothesis / arma::repmat(arma::sum(hypothesis, 0), numClasses, 1); }
void HMM::computeXiCached() { arma::mat temp = B_.rows(1,T_-1) % beta_.cols(1,T_-1).t(); for(unsigned int i = 0; i < N_; ++i) { xi_.slice(i) = temp % (alpha_(i,arma::span(0, T_-2)).t() * A_.row(i)); } }
void anderson::update(arma::vec & v, const arma::vec & f) { using namespace arma; if (updates == 0) { v_old = v; f_old = f; v += f * beta; } else { // update the history matrices K = join_horiz(K, v - v_old); D = join_horiz(D, f - f_old); // cut the matrices to desired maximum length if (D.n_cols > N) { K = K.cols(1 , K.n_cols - 1); D = D.cols(1 , D.n_cols - 1); } // temporary variable mat I = D.t() * D; // check if I's condition number is to large, i.e. I^-1 close to singular while ((cond(I) > 1e16) && (D.n_cols > 1)) { // in that case, cut older values until it works again K = K.cols(1, K.n_cols - 1); D = D.cols(1, D.n_cols - 1); I = D.t() * D; } v_old = v; f_old = f; // substract the anderson-mixing term v += f * beta - (K + D * beta) * arma::solve(I, D.t()) * f; } ++updates; }
arma::mat subspaceIdMoor::blkhank(arma::mat const &y, uword i, uword j){ assert(y.n_rows < y.n_cols); uword ny = y.n_rows; uword N = y.n_cols; if (j > N - i + 1) cerr << ("blkHank: j too big") << endl; mat H(ny*i, j); for (uword k = 0; k < i; k++) H.rows(k*ny, (k + 1)*ny - 1) = y.cols((k), k + j - 1); //H.save("H.dat", raw_ascii); return H; }
std::vector<GMM> operator() (const arma::mat & data, const arma::urowvec & labels) { const unsigned int numLabels = (unsigned int) arma::as_scalar(arma::max(labels)) + 1; std::vector<GMM> BModels; for (unsigned int i = 0; i < numLabels; ++i){ arma::uvec indices = arma::find(labels == i); if (indices.n_elem > 0) { BModels.push_back(GMM(data.cols(indices), kmin_, kmax_)); } } return BModels; }
// [[Rcpp::export]] arma::mat reFitUnivariate(arma::vec y, arma::mat design_mat, arma::mat beta, int nlam, int J, int n) { arma::uvec solo(1); // Initialize ans matrix to store re-fitted values. arma::mat ans_beta(J, nlam, fill::zeros); for(uword i = 0; i < nlam; i++) { arma::vec temp = beta.col(i); arma::uvec inds = find(temp); if(inds.n_elem > 0){ arma::vec temp_beta = solve(design_mat.cols(inds), y); solo(0) = i; ans_beta(inds, solo) = temp_beta; } } return ans_beta; }
//' @title Sample FSV loads //' @description Given the data, the factors and the idiosyncratic variances, //' this function sample the loadings matrix with PLT prior constraints. //' @param y data matrix which follow the FSV model. //' @param factors matrix of factors \eqn(T \times k). //' @param psi idiosyncratic variances. //' @param m0 prior mean of the loads. //' @param C0 prior variance of the loads; //' @return A matrix with the loads. // [[Rcpp::export]] arma::mat SampleFsvLoads(arma::mat y, arma::mat factors, arma::vec psi, double m0, double C0){ int k = factors.n_cols; int q = psi.n_rows; arma::mat Fi, Ci, invCi, rootInvCi, rootCi; arma::vec mi; // simulacao da matriz com restricoes arma::mat Lambda_1(k, k); Lambda_1.eye(); arma::mat FtF = factors.t() * factors; for (int ik = 1; ik < k; ik++){ Fi = factors.cols(0, ik-1); invCi = (1/psi(ik, 0)) * FtF.submat(0, 0, ik-1, ik-1) + (1/C0); rootInvCi = arma::chol(arma::symmatu(invCi)); rootCi = arma::trans(arma::inv(arma::trimatu(rootInvCi))); mi = rootCi.t() * rootCi * ((1/psi(ik, 0)) * Fi.t() * y.col(ik) + (m0/C0)); Lambda_1.submat(ik, 0, ik, ik-1) = mi.t() + randn(1, ik)*rootCi; } // simulacao da matriz sem restricoes arma::mat Lambda_2(q-k, k); Lambda_2.zeros(); for (int ik = k; ik < q; ik++){ invCi = (1/psi(ik, 0)) * FtF + (1/C0); rootInvCi = arma::chol(arma::symmatu(invCi)); rootCi = arma::trans(arma::inv(arma::trimatu(rootInvCi))); mi = rootCi.t() * rootCi * ((1/psi(ik, 0)) * factors.t() * y.col(ik) + (m0/C0)); Lambda_2.row(ik-k) = mi.t() + randn(1, k)*rootCi; } arma::mat Lambda = join_cols(Lambda_1, Lambda_2); return Lambda; }
// [[Rcpp::export]] List real_cont( arma::mat coeffs_cont, arma::mat X, int n_exog, int n_endog, int n_cont, arma::rowvec rho, arma::rowvec sig_eps, int N, arma::rowvec upper, arma::rowvec lower, bool cheby, int seed=222 ){ // Computes a matrix of realized next-period controls from a simulation int n_pts = X.n_rows ; // The number of points at which the error is assessed mat exog = zeros<rowvec>( n_exog ) ; mat endog = zeros<rowvec>( n_endog ) ; mat cont_sim = zeros( n_pts, n_cont ) ; // Temporary containers used in the loop. Make cont bigger than size 0 // here - just passing a useless empty container arma_rng::set_seed(seed) ; // Set the seed mat exog_sim = ( ones( n_pts ) * rho ) % X.cols( 0, n_exog - 1 ) + ( ones( n_pts ) * sig_eps ) % randn<mat>( n_pts, n_exog ) ; // The random draws /** Now compute the model errors **/ for( int i = 0 ; i < n_pts ; i++ ){ // Loop over the evaluation points exog = exog_sim.row(i) ; // The updated exogenous variable in the next period endog = X.row(i).subvec( n_exog, n_exog + n_endog - 1 ) ; // Select the current-period endogenous states cont_sim.row(i) = endog_update( exog, endog, coeffs_cont, n_exog, n_endog, N, upper, lower, cheby ) ; // The integral over realizations of the shock } List out ; out["r.exog"] = exog_sim ; out["r.cont"] = cont_sim ; // Create the output list return out ; }
/// /// \brief Vespucci::Math::DimensionReduction::VCA /// Vertex Component Analysis /// \param R The dataset /// \param endmembers Number of endmembers to compute /// \param indices Row indices of pure components. /// \param endmember_spectra Spectra of pure components (note that these are in /// columns, not rows as in spectra_) /// \param projected_data Projected data /// \param fractional_abundances Purity of a given spectrum relative to endmember /// \return Convergeance (no actual test implemented...) /// bool Vespucci::Math::DimensionReduction::VCA(const arma::mat &R, arma::uword p, arma::uvec &indices, arma::mat &endmember_spectra, arma::mat &projected_data, arma::mat &fractional_abundances) { //Initializations arma::uword L = R.n_rows; arma::uword N = R.n_cols; if (L == 0 || N == 0){ std::cerr << "No data!" << std::endl; return false; } if (p > L){ std::cerr << "wrong number of endmembers (" << p << ")!"<< std::endl; std::cerr << "set to 5 or one less than number of spectra" << std::endl; p = (L < 5? 5: L-1); } //mat of SNR arma::mat r_m = mean(R, 1); arma::mat R_m = arma::repmat(r_m, 1, N); //the mean of each spectral band arma::mat R_o = R - R_m; //mean-center the data arma::mat Ud; arma::vec Sd; arma::mat Vd; //arma::svds(Ud, Sd, Vd, arma::sp_mat(R_o * R_o.t()/N), p); Vespucci::Math::DimensionReduction::svds(R_o*R_o.t()/N, p, Ud, Sd, Vd); arma::mat x_p; try{ x_p = Ud.t() * R_o; }catch(std::exception e){ std::cout << "Ud.t() * R_o" << std::endl; } double SNR = Vespucci::Math::DimensionReduction::estimate_snr(R, r_m, x_p); double SNR_th = 15 + 10*log10(p); //Choose projective projection or projection to p-1 subspace arma::mat y; if (SNR < SNR_th){ arma::uword d = p - 1; Ud = Ud.cols(0, d-1); projected_data = Ud * x_p.rows(0, d-1) + R_m; //in dimension L arma::mat x = x_p.rows(0, d-1);//x_p = trans(Ud)*R_o, p-dimensional subspace //following three lines are one in arma::matlab... arma::mat sum_squares = sum(pow(x, 2)); double c = sum_squares.max(); c = std::sqrt(c); y = arma::join_vert(x, c*arma::ones(1, N)); } else{ arma::uword d = p; Vespucci::Math::DimensionReduction::svds(R*R.t()/N, p, Ud, Sd, Vd); arma::svds(Ud, Sd, Vd, arma::sp_mat(R*R.t()/N), d);//R_o is a mean centered version... x_p = Ud.t() * R; projected_data = Ud * x_p.rows(0, d-1); arma::mat x = Ud.t() * R; arma::mat u = arma::mean(x, 1); y = x / arma::repmat(sum(x % arma::repmat(u, 1, N)), d, 1); } // The VCA algorithm arma::vec w; w.set_size(p); arma::vec f; arma::rowvec v; indices.set_size(p); //there are no fill functions for arma::uvecs for (arma::uword i = 0; i < p; ++i) indices(i) = 0; arma::mat A = arma::zeros(p, p); double v_max; double sum_squares; arma::uvec q1; A(p-1, 0) = 1; for (arma::uword i = 0; i < p; ++i){ w.randu(); f = w - A*arma::pinv(A)*w; sum_squares = sqrt(sum(square(f))); f /= sum_squares; v = f.t() * y; v_max = arma::max(abs(v)); q1 = arma::find(abs(v) == v_max, 1); indices(i) = q1(0); A.col(i) = y.col(indices(i)); //same as x.col(indices(i)); } endmember_spectra = projected_data.cols(indices); fractional_abundances = arma::trans(pinv(endmember_spectra) * projected_data); return true; }
void build_density(arma::mat& P, arma::mat& C, size_t NOcc) { P = C.cols(0, NOcc-1) * C.cols(0, NOcc-1).t(); }
// [[Rcpp::export]] List glm_forward_c( arma::mat x, // inputs (features) Function pseudo_obs, // R-function returning the pseudo-data based on the quadratic approximation double lambda, // regularization parameter (multiplier for L2-penalty) bool intercept, // whether to use intercept arma::vec penalty, // relative penalties for the variables double thresh, // threshold for stopping the iterative reweighted least squares int qa_updates_max, // max number or quadratic approximation updates int pmax, // maximum number of variables up to which the search is continued arma::vec w0, // initial guess for the weights of the pseudo-gaussian observations (needed for Student-t model) int ls_iter_max=50 ) // max number of line search iterations { mat xp; // x for the current active set mat xp_temp; // x for the current active set + the variable to be added size_t D = x.n_cols; // total number of inputs size_t pmaxu = (size_t) pmax; // converting pmax to unsigned int (avoids some compiler warnings) uvec chosen(D); chosen.zeros(); // keeps track of added variables uvec varorder(pmaxu); varorder.zeros(); // stores the order in which the variables are added to the model mat beta_all(D,pmaxu); beta_all.zeros(); // collects beta from all steps rowvec beta0_all(pmaxu); beta0_all.zeros(); // collects beta0 from all steps mat w_all(x.n_rows,pmaxu); // collects weights of the gaussian pseudo-observations from all steps // declare a few variables that are needed during the iteration vec w = w0; int qau; size_t j,k,jopt=0; uvec varind; uvec step(1); for (k=0; k<pmaxu; ++k) { varind = find(chosen); vec beta(varind.size() + 1 + intercept); vec betaopt; double loss_min = std::numeric_limits<double>::infinity(); double loss; // loop through all the candidate variables that could be added next for (j=0; j<D; ++j) { if (chosen(j)) continue; chosen(j) = 1; beta.zeros(); glm_ridge(beta, loss, w, qau, x.cols(find(chosen)), pseudo_obs, lambda, intercept, penalty.elem(find(chosen)), thresh, qa_updates_max, ls_iter_max); chosen(j) = 0; if (loss < loss_min) { loss_min = loss; jopt = j; betaopt = beta; } } varorder(k) = jopt; chosen(jopt) = 1; step(0) = k; if (intercept) { beta0_all(k) = betaopt(0); beta_all.submat(find(chosen), step) = betaopt.tail(k+1); } else { beta0_all(k) = 0; beta_all.submat(find(chosen), step) = betaopt; } w_all.col(k) = w; } return List::create(beta_all, beta0_all, varorder, w_all); }
ss subspaceIdMoor::subidKnownOrder(arma::uword ny, arma::uword nu, arma::mat const &R, arma::mat const &Usvd, arma::vec const &singval, arma::uword i, arma::uword n){ ss ssout; mat U1 = Usvd.cols(0, n - 1); /* STEP 4 in Subspace Identification*/ /*Determine gam and gamm*/ mat gam = U1 * diagmat(sqrt(singval.subvec(0, n - 1))); mat gamm = gam.rows(0, ny*(i - 1) - 1); mat gam_inv = pinv(gam); /*pseudo inverse*/ mat gamm_inv = pinv(gamm); /*pseudo inverse*/ /* STEP 5*/ mat Rhs, Lhs; buildRhsLhsMatrix(gam_inv, gamm_inv, R, i, n, ny, nu, Rhs, Lhs); /* Solve least square*/ mat solls; solls = solve(Rhs.t(), Lhs.t()).t(); /* Extract system matrix:*/ mat A, C; A = solls.submat(0, 0, n - 1, n - 1); C = solls.submat(n, 0, n + ny - 1, n - 1); mat res = Lhs - solls*Rhs; /* Recompute gamma from A and C:*/ gam.zeros(); gam.rows(0, ny - 1) = C; for (uword k = 2; k <= i; k++){ gam.rows((k - 1)*ny, k*ny - 1) = gam.rows((k-2)*ny, (k-1)*ny - 1) * A; } gamm = gam.rows(0, ny*(i - 1) - 1); gam_inv = pinv(gam); gamm_inv = pinv(gamm); /* Recompute the states with the new gamma:*/ buildRhsLhsMatrix(gam_inv, gamm_inv, R, i, n, ny, nu, Rhs, Lhs); /* STEP 6:*/ /* Computing system Matrix B and D*/ /*ref pag 125 for P and Q*/ mat P = Lhs - join_vert(A, C) * Rhs.rows(0, n - 1); P = P.cols(0, 2*nu*i - 1); mat Q = R.submat(nu*i, 0, 2 * nu*i - 1, 2 * nu*i - 1); /*Future inputs*/ /* Matrix L1, L2 and M as on page 119*/ mat L1 = A * gam_inv; mat L2 = C * gam_inv; mat M = join_horiz(zeros(n, ny), gamm_inv); mat X = join_vert(join_horiz(eye(ny, ny), zeros(ny, n)), join_horiz(zeros(ny*(i-1), ny), gamm)); /* Calculate N and the Kronecker products (page 126)*/ mat N; uword kk = 1; buildNMatrix(kk, M, L1, L2, X, i, n, ny, N); mat totm = kron(Q.rows((kk-1)*nu, kk*nu - 1).t(), N); for (kk = 2; kk <= i; kk++){ buildNMatrix(kk, M, L1, L2, X, i, n, ny, N); totm = totm + kron(Q.rows((kk - 1)*nu, kk*nu - 1).t(), N); } /* Solve Least Squares: */ mat Pvec = vectorise(P); mat sollsq2 = solve(totm, Pvec); /*Mount B and D*/ sollsq2.reshape(n + ny, nu); mat D = sollsq2.rows(0, ny - 1); mat B = sollsq2.rows(ny, ny+n - 1); /* STEP 7: Compute sys Matrix G, L0*/ mat covv, Qs, Ss, Rs, sig, G, L0, K, Ro; if (norm(res) > 1e-10){ /*Determine QSR from the residuals*/ covv = res*res.t(); Qs = covv.submat(0, 0, n - 1, n - 1); Ss = covv.submat(0, n, n - 1, n + ny - 1); Rs = covv.submat(n, n, n+ny - 1, n+ny - 1); simple_dlyap(A, Qs, sig); /*solves discrete lyapunov matrix equation*/ G = A*sig*C.t() + Ss; L0 = C*sig*C.t() + Rs; /* Determine K and Ro*/ g12kr(A, G, C, L0, K, Ro); } /* Set to ss structure:*/ ssout.A = A; ssout.B = B; ssout.C = C; ssout.D = D; //ssout.A = A; parei aqui -> add later the ones related with stochastic to ss. return ssout; }
std::vector<int> HighOrderMeshGenerator:: insertNewNodes(const unique_element_ptr& el, const arma::mat& newelnodes, const arma::mat& parametric_coords, int order){ const int type = el->getElementType(); const NodeIndexer* ni_old = index_factory->getNodeIndexer(type,el->getOrder()); const NodeIndexer* ni_new = index_factory->getNodeIndexer(type,order); std::vector<int> newnode_indices(ni_new->Ndof(),-1); const std::vector<indtype>& corner_new = ni_new->getCornerNodes(); const gind* old_nodes = el->getNodes(); const std::vector<indtype>& corner_old = ni_old->getCornerNodes(); for(int i = 0; i < el->numCornerNodes(); i++){ //std::cout << "corner node: " << corner_new[i] << " " << // corner_old[i] << std::endl; newnode_indices[corner_new[i]] = old_nodes[corner_old[i]]; } //std::cout << el->getDim() << std::endl; for(int ch = 0; ch < el->NumChildren(); ch++){ const MEl* child_old = el->getChild(ch); if(el->getDim() > 1) assert(child_old); if(child_old){ //int child_dim = child_old->getDim(); auto child_it = elmap[el->getDim()-1].find(child_old); assert(child_it != elmap[el->getDim()-1].end()); const MEl* child_new = child_it->second; const gind* child_nodes = child_new->getNodes(); int orient = el->getChildOrientation(ch); const std::vector<indtype>& child_node_indices = ni_new->getChildNodes(ch); const NodeIndexer* ni_child = index_factory->getNodeIndexer(child_new->getElementType(), child_new->getOrder()); const std::vector<indtype>& orient_indices = ni_child->getOrientedNodes(orient); assert(child_node_indices.size() == child_new->NumNodes()); for(int nd = 0; nd < child_new->NumNodes(); nd++){ newnode_indices[child_node_indices[nd]] = child_nodes[orient_indices[nd]]; } } } // end child loop // delete the old interior nodes from map const std::vector<indtype>& old_interior_indices = ni_old->getInteriorNodes(); for(int i = 0; i < old_interior_indices.size(); i++){ meshcurr->getNodesNC().erase(old_nodes[old_interior_indices[i]]); } // insert new nodes into node map //std::cout << "ni_new type: " << ni_new->getType() << std::endl; //arma::uvec new_interior_indices; //std::cout << "el type: " << el->getElementType() << std::endl; arma::uvec new_interior_indices(ni_new->getInteriorNodes()); //std::cout << "h3.1" << std::endl; //std::cout << new_interior_indices << std::endl; //std::cout << newelnodes << std::endl; arma::mat new_interior_nodes = newelnodes.cols(new_interior_indices); //std::cout << "h3.2" << std::endl; //std::cout << new_interior_indices << std::endl; //std::cout << parametric_coords << std::endl; arma::mat new_interior_parametric_coords; if(!parametric_coords.is_empty()){ new_interior_parametric_coords = parametric_coords.cols(new_interior_indices); } //std::cout << "h4" << std::endl; nodeFactory* node_factory = nodeFactory::Instance(); int node_type = 3; if(el->hasGeoEntity()) node_type = el->getGeoType()+1; //std::cout << new_interior_nodes << std::endl; if(el->getElementType() == 1){ //std::cout << el->getGeoType() << std::endl; //if(!el->hasGeoEntity()) std::cout << "Line does not have a geo entity!" << std::endl; } for(int i = 0; i < new_interior_indices.size(); i++){ double uv[2]; for(int j = 0; j < parametric_coords.n_rows; j++){ uv[j] = new_interior_parametric_coords.at(j,i); } int nd_count = node_factory->GetNodeCount(); newnode_indices[new_interior_indices[i]] = nd_count; auto newnode = node_factory->CreateNode(node_type, new_interior_nodes.colptr(i), el->getGeoEntity(), uv[0],uv[1]); meshcurr->getNodesNC()[nd_count] = std::move(newnode); } //std::cout << "h5" << std::endl; return newnode_indices; }
tuple<double, double, int, int, double, double> simulate(const arma::Col<double> &Y, const vector<int> X, double sigma, bool varianceKnown, arma::mat &Z, mt19937_64 &rng, bool interceptTerm) { bernoulli_distribution bernoulli(0.5); int N = X.size(); Z.fill(0); // bestColumns[k] keeps track of the k + 1 or k + 2 columns that produce the smallest p-value depending on interceptTerm vector<arma::uvec> bestColumns; bestColumns.reserve(N - 1); if (interceptTerm) { // make intercept term last column of Z fill(Z.begin_col(N - 1), Z.end_col(N - 1), 1); copy(X.begin(), X.end(), Z.begin_col(0)); bestColumns.push_back(arma::uvec{0, (unsigned long long) N - 1ULL}); } else { copy(X.begin(), X.end(), Z.begin_col(0)); bestColumns.push_back(arma::uvec{0}); } // bestPValues[k] corresponds to p-value if the columns bestColumns[k] are used vector<pair<double, double>> bestPValues; bestPValues.reserve(N - 1); bestPValues.push_back(calculateBetaPValue(Z.cols(bestColumns.front()), Y, sigma, varianceKnown)); if (bestPValues.front().first <= 0.05) { return make_tuple(bestPValues.front().first, bestPValues.front().second, 0, 0, -1, bestPValues.front().first); } else { // need more covariates bool done = false; int smallestSubsetSize = INT_MAX; /* add covariates one-by-one, we always include the treatment * if we're using the intercept two covariates are included by default */ for (int j = 1; j < N - 2 || (j == N - 2 && !interceptTerm); ++j) { for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng); if (!interceptTerm) { while (arma::rank(Z) <= j) { for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng); } } else { // offset rank by 1 for intercept term while (arma::rank(Z) <= j + 1) { for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng); } } for (int k = j; k >= 1; --k) { // loop through subset sizes, k is the number of additional covariates pair<double, double> newPValue; if (k == j) { // use all available covariates bestColumns.emplace_back(bestColumns.back().n_rows + 1); // add one more to biggest subset for (int l = 0; l < bestColumns.back().n_rows - 1; ++l) { bestColumns.back()(l) = bestColumns[j - 1](l); // copy over from original subset } bestColumns.back()(bestColumns.back().n_rows - 1) = j; // add new covariate newPValue = calculateBetaPValue(Z.cols(bestColumns.back()), Y, sigma, varianceKnown); bestPValues.push_back(newPValue); } else { // make a new subset of same size with new covariate arma::uvec columnSubset(bestColumns[k].n_rows); for (int l = 0; l < columnSubset.n_rows - 1; ++l) columnSubset(l) = bestColumns[k - 1](l); // copy over from smaller subset columnSubset(columnSubset.n_rows - 1) = j; // add new covariate newPValue = calculateBetaPValue(Z.cols(columnSubset), Y, sigma, varianceKnown); if (bestPValues[k].first > newPValue.first) { // if better subset replace bestPValues[k] = newPValue; bestColumns[k] = columnSubset; } } if (newPValue.first <= 0.05) { // stop when we reach significance done = true; smallestSubsetSize = k; } } if (done) { // compute balance p value in special case that only 1 covariate was needed double balancePValue = -1; if (smallestSubsetSize == 1 && !interceptTerm) { balancePValue = testBalance(Z.col(bestColumns[1](1)), Z.col(0)); } else if (smallestSubsetSize == 1 && interceptTerm) { balancePValue = testBalance(Z.col(bestColumns[1](2)), Z.col(0)); } return make_tuple(bestPValues.front().first, bestPValues[smallestSubsetSize].second, j, smallestSubsetSize, balancePValue, bestPValues[smallestSubsetSize].first); } } } return make_tuple(bestPValues.front().first, bestPValues.front().second, -1, -1, -1, bestPValues.front().first); }