arma::vec cpp_UpdateBeta (arma::mat x_small, arma::vec weights_small, arma::vec y_small, arma::mat x_big, arma::vec weights_big, arma::vec y_big, arma::vec gamma) { // This is a simple helper function used within the solver for // weighted linear regression with an l1 penalty. // // Args: // gamma: The current estimate of gamma, used to update beta. // ...: For all other arguments see cpp_SolveL1Linear. // Returns: // beta: The update of the parameter vector beta. // Obtain the matrix W * X where W is the diagonal weight matrix arma::mat weighted_X_small(x_small.begin(), x_small.n_rows, x_small.n_cols, true); arma::mat weighted_X_big(x_big.begin(), x_big.n_rows, x_big.n_cols, true); weighted_X_small.each_col() %= weights_small; weighted_X_big.each_col() %= weights_big; // Update beta by a simple least squares like update. arma::vec beta; beta = inv(x_small.t() * weighted_X_small + x_big.t() * weighted_X_big) * (weighted_X_small.t() * y_small + weighted_X_big.t() * y_big - x_big.t() * weighted_X_big * gamma); return beta; }
// this function sample the loading parameters considering constant // observational variance. // [[Rcpp::export]] arma::mat SampleDfmLoads(arma::mat y, arma::mat factors, arma::vec psi, int s, double c0) { /** * The data matrix 'y' is (T \times q). * Note that the factors matrix is (T+h \times k). In the models considered * in the dissertation, h > s. This means that we always have the * VAR order, 'h', greater than the number of dynamic factors, 's', in the * observational equation. * The argument 's' is the order of the dynamic factors present in the * observational equation. * The prior mean is set to zero. */ int T = y.n_rows; int q = y.n_cols;; int k = factors.n_cols; int TpH = factors.n_rows; int h = TpH - T; if (h < s+1){ throw std::range_error( "Argument 's' greater than or equal to factors' VAR order in SampleDfmLoads.cpp" ); } arma::mat I_q = eye(q, q); // std::cout << "\nh = " << h << "\n"; arma::mat Xlambda; Xlambda = factors.rows(h, TpH-1); // std::cout << "\nNumber of rows in Xlambda is " << Xlambda.n_rows << "\n"; if (s > 0){ for (int i = 1; i <= s; i++){ Xlambda = arma::join_rows(Xlambda, factors.rows(h-i, TpH-i-1)); } } // posterior variance arma::mat Eigvec; arma::vec eigval; arma::eig_sym(eigval, Eigvec, arma::symmatu(Xlambda.t() * Xlambda)); arma::mat kronEig = kron(I_q, Eigvec); arma::vec L1eigval = 1.0/(arma::kron((1.0/psi), eigval) + 1/c0); arma::mat L1 = kronEig * diagmat(L1eigval) * kronEig.t(); // posterior mean arma::vec yStar(y.begin(), T*q); arma::vec l1 = L1 * (arma::kron(arma::diagmat(1.0/psi), Xlambda.t()) * yStar); arma::mat l1Mat(l1.begin(), k*(s+1), q); // mean in matrix form // random generation arma::vec vecnorm = sqrt(L1eigval) % randn(q*k*(s+1), 1); arma::mat LambdaBar(vecnorm.begin(), k*(s+1), q, false); LambdaBar = l1Mat + Eigvec * LambdaBar; return LambdaBar.t(); }
//[[Rcpp::export]] IntegerVector whichNA(arma::mat X) { NumericVector x(X.begin(), X.end()); int n = x.size(); IntegerVector y = seq_len(n);//(x.begin(), x.end()); IntegerVector z = y[is_na(x)]; if(z.size()==0) std::cout << "\nNenhum valor faltante.\n"; return z; }
/** \brief Extracts the data and passes it to the kmeans algorithm * \param[in] model An ESBTL Protein model * \param[in] An armadillo matrix, will get overwritten * \return A labeling vector * */ arma::urowvec operator() (const ESBTL::Default_system::Model& model, arma::mat & data) { data = arma::mat(3, (unsigned int) std::distance(model.atoms_begin(), model.atoms_end())); auto mat_it = data.begin(); for (ESBTL::Default_system::Model::Atoms_const_iterator it_atm=model.atoms_begin();it_atm!=model.atoms_end();++it_atm){ *(mat_it++) = it_atm->x(); *(mat_it++) = it_atm->y(); *(mat_it++) = it_atm->z(); } data = transformation_(data); arma::urowvec labels = kmeans(data, params_); return labels; }
// [[Rcpp::export]] arma::mat cpp_GenerateWeightMat(double lambda, arma::mat x_small, arma::mat x_big, arma::mat x_test, arma::vec mu_small, arma::vec mu_big) { // This function is the c++ implementation of the function // GenerateWrightMat. For details see helper_functions.R // // Args: // Note: The arguments remain unchanged. The notation is slightly changed // because in C++ we cannot name objects with '.' and hence is replaced by // '_' for object names. int N_small = x_small.n_rows; int N_big = x_big.n_rows; // int N_test = x_test.n_rows; int p = x_small.n_cols; arma::mat cov_small(x_small.begin(), N_small, p, true); cov_small.each_col() %= mu_small % (1 - mu_small); cov_small = trans(x_small) * cov_small; arma::mat cov_big(x_big.begin(), N_big, p, true); cov_big.each_col() %= mu_big % (1 - mu_big); cov_big = trans(x_big) * cov_big; arma::mat cov_test = trans(x_test) * x_test; arma::mat temp_matrix = cov_test * inv_sympd(cov_big) * cov_small; arma::mat weight_matrix = cov_small + lambda * (cov_test + temp_matrix); weight_matrix = inv(weight_matrix) * (cov_small + lambda * temp_matrix); // In case we would like these quantities. // arma::mat cov_beta_small = inv(cov_small); // arma::mat cov_beta_lambda = inv(cov_small + cov_big - // cov_big * inv_sympd(cov_test) * cov_big/lambda); return weight_matrix; }
arma::vec cpp_UpdateGamma (arma::mat x_big, arma::vec weights_big, arma::vec y_big, arma::vec gamma, arma::vec beta, double lambda) { // Another helper function used by the main algorithm. // In this algorithm we loop through and do a coordinate wise update of // gamma. // // Agrs: // beta/gamma: The 'current' parameter vectors. // x/weights/y_big: Design, weights and response for the big population. // lambda: The value of the penalty parameter. // Returns: // gamma: The updated gamma vector. // Obtain value of dimension. int p = gamma.size(); // Initialize matrix X * beta. arma::mat x_beta = x_big * beta; // We need the diagonal entries of X^T * W * X. // This is easy when W is a diagonal matrix. arma::mat W_times_X(x_big.begin(), x_big.n_rows, x_big.n_cols, true); W_times_X.each_col() %= weights_big; // We obtain the diagonal entries of X^T * W * X for the denominators. arma::mat Xt_W_X = x_big.t() * W_times_X; arma::vec denoms = Xt_W_X.diag(); // Some temporary vectors which we use later. arma::vec temp_gamma(gamma.begin(), gamma.size(), true); double temp1; // Loop through the different gamma values which need to be updated. for(int i = 0; i < p; i++) { // Instead of excluding a column we simply set gamma_j = 0 to get the fitted // value without the effect of gamma_j. temp_gamma(i) = 0; temp1 = as_scalar(x_big.col(i).t() * (weights_big % (y_big - x_beta - x_big * temp_gamma))); gamma(i) = (cpp_sign(temp1) * cpp_max(abs(temp1) - lambda, 0.0))/denoms(i); temp_gamma(i) = gamma(i); } return gamma; }
//[[Rcpp::export]] Rcpp::List changeNA(arma::mat X) { NumericVector x(X.begin(), X.end()); int n = x.size(); IntegerVector y = seq_len(n)-1;//(x.begin(), x.end()); IntegerVector z = y[is_na(x)]; if(z.size()==0){ std::cout << "Nenhum valor faltante.\n"; SEXP xNULL = R_NilValue; return Rcpp::List::create(xNULL); } else{ arma::uvec Xna = as<arma::uvec>(z); NumericVector bx(Xna.n_rows, 100.0); arma::vec b = as<arma::vec>(bx); X.elem(Xna) = b; return Rcpp::List::create(X); } }
/** \brief Extracts the data and passes it to the kmeans algorithm * \param[in] model An ESBTL Protein model * \param[in] An armadillo matrix, will get overwritten * \return A labeling vector */ arma::urowvec operator() (const ESBTL::Default_system::Model& model, arma::mat & data) { data = arma::mat(3, (unsigned int) std::distance(model.atoms_begin(), model.atoms_end())); arma::urowvec labels = arma::urowvec(data.n_cols); unsigned int * label_it = labels.begin(); unsigned int label = 0; auto mat_it = data.begin(); for (ESBTL::Default_system::Model::Chains_const_iterator it_cha=model.chains_begin();it_cha!=model.chains_end();++it_cha){ for (ESBTL::Default_system::Model::Chain::Atoms_const_iterator it_atm = it_cha->atoms_begin(); it_atm != it_cha->atoms_end(); ++it_atm) { *(mat_it++) = it_atm->x(); *(mat_it++) = it_atm->y(); *(mat_it++) = it_atm->z(); *(label_it++) = label; } ++label; } return labels; }
//[[Rcpp::export]] arma::vec cpp_SolveLogistic(arma::mat X, arma::vec y, arma::vec initial_beta, int max_iter = 100, double tol = 1e-5) { // The C++ implementation of the function SolveLogistic. // One main difference is that I cannot figure out how to pass null values // to c++. In this case an initial estimate of beta is required. // Initialize some vectors arma::vec beta(initial_beta.begin(), initial_beta.size(), false); arma::vec beta_new = beta; arma::vec mu_current = cpp_GetFittedValues(X, beta); // Begin main loop. for (int i = 1; i <= max_iter; i++) { // Set current vector of fitted values. mu_current = cpp_GetFittedValues(X, beta); // Evaluate V * X where V is the diagonal variance matrix. arma::mat var_times_X(X.begin(), X.n_rows, X.n_cols, true); var_times_X.each_col() %= mu_current % (1 - mu_current); // Update beta. beta_new = beta + inv(trans(X) * var_times_X) * (trans(X) * (y - mu_current)); // Check for convergence. if (sum(pow(beta_new - beta, 2)) < tol) { return beta_new; } else { beta = beta_new; } } // Throw warning if it did not converge. Function warning("warning"); warning("The newton-raphson algorithm did not converge."); // Return the non-converged beta vector. return beta_new; }
////> Main function //// // [[Rcpp::export]] List calcPotts_cpp (const S4& W_SR, const S4& W_LR, arma::mat sample, NumericVector rho, const arma::mat& coords, const IntegerVector& site_order, int iter_max, double cv_criterion, bool test_regional, NumericVector distance_ref, double threshold, double neutre, int nbGroup_min, bool multiV, bool last_vs_others, bool prior, bool type_reg, int verbose){ //// preparation int p = sample.n_cols; int n = sample.n_rows; IntegerVector W_SRi = W_SR.slot("i"); IntegerVector W_SRp = W_SR.slot("p"); NumericVector W_SRx = W_SR.slot("x"); arma::mat Wpred(n, p); vector < vector < double > > pred_global(p); for(int iter_p = 0 ; iter_p < p ; iter_p ++){ pred_global[iter_p].resize(n); for(int iter_px = 0 ; iter_px < n ; iter_px++){ pred_global[iter_p][iter_px] = sample(iter_px, iter_p); } } vector < vector < double > > pred_global_hist = pred_global; if(prior == false){ std::fill(sample.begin(), sample.end(), 1); } arma::mat V(n, p); IntegerVector rang; bool no_site_order = (site_order[0] < 0); if(no_site_order == false){ rang = site_order; } int index_px; double norm; double val_criterion = cv_criterion + 1; double diff_criterion = 0, diff; double cv_criterion2 = n * cv_criterion ; int iter_updateV = 0 ; vector < double > res_multipotentiel(n) ; int iter = 0 ; //// estimation while(iter < iter_max && ( (val_criterion > cv_criterion) || (test_regional == true && iter_updateV != iter) ) ){ iter++; pred_global_hist = pred_global; if(no_site_order){ rang = rank_hpp(runif(n)) - 1; // tirer aleatoirement l ordre des sites } //// regional potential if(test_regional == true){ if(iter == 1 || val_criterion <= cv_criterion || diff_criterion > cv_criterion2){ iter_updateV = iter ; for(int iter_p = (p - 1) ; iter_p>= 0 ; iter_p --){ if(last_vs_others == false || iter_p == (p - 1)){ res_multipotentiel = calcMultiPotential_hpp(W_SR, W_LR, pred_global[iter_p], threshold, coords, Rcpp::as < std::vector < double > >(distance_ref), nbGroup_min, multiV, neutre)[0]; for(int iter_px = 0 ; iter_px < n ; iter_px++){ V(iter_px, iter_p) = res_multipotentiel[iter_px]; if(type_reg){ V(iter_px, iter_p) = V(iter_px, iter_p) * (V(iter_px, iter_p) - pred_global[iter_p][iter_px]); } } }else{ V.col(iter_p) = 1 - V.col(p - 1); } } }else{ cv_criterion2 /= 1.1; } } //// update site probabilities val_criterion = 0; diff_criterion = 0; for(int iter_px = 0 ; iter_px < n ; iter_px++){ // pour chaque pixel norm = 0.0; index_px = rang(iter_px); for(int iter_p = 0 ; iter_p < p ; iter_p++){ // pour chaque groupe Wpred(index_px, iter_p) = 0.0; for(int iter_vois = W_SRp[index_px]; iter_vois < W_SRp[index_px + 1]; iter_vois++){ // pour chaque voisin if(type_reg){ Wpred(index_px, iter_p) += W_SRx[iter_vois]*pred_global[iter_p][W_SRi[iter_vois]]*max(0.0, pred_global[iter_p][W_SRi[iter_vois]]-sample(index_px, iter_p)); }else{ Wpred(index_px, iter_p) += W_SRx[iter_vois] *pred_global[iter_p][W_SRi[iter_vois]]; } } // exponentielle rho if(test_regional == false){ pred_global[iter_p][index_px] = sample(index_px, iter_p) *exp(rho(0) *Wpred(index_px, iter_p)) ; }else{ pred_global[iter_p][index_px] = sample(index_px, iter_p) *exp(rho(0) *Wpred(index_px, iter_p) + rho(1) *V(index_px, iter_p)); } norm += pred_global[iter_p][index_px]; } // normalisation for(int iter_p = 0 ; iter_p < p ; iter_p++){ pred_global[iter_p][index_px] /= norm; diff = abs(pred_global_hist[iter_p][index_px]-pred_global[iter_p][index_px]); diff_criterion += diff; val_criterion = max(val_criterion, diff); } } if(verbose > 0){ if(verbose == 1){Rcout << "*" ;} if(verbose == 2){Rcout << "iteration " << iter << " : totaldiff = " << diff_criterion << " | maxdiff = " << val_criterion << endl;} } } if(verbose == 1){Rcout << endl;} //// export arma::mat spatialPrior(n, p); for(int iter_px = 0 ; iter_px < n ; iter_px++){ index_px = rang(iter_px); for(int iter_p = 0 ; iter_p < p ; iter_p++){ if(test_regional == false){ spatialPrior(index_px, iter_p) = exp(rho(0) *Wpred(index_px, iter_p)) ; }else{ spatialPrior(index_px, iter_p) = exp(rho(0) *Wpred(index_px, iter_p) + rho(1) *V(index_px, iter_p)); } } } return Rcpp::List::create(Rcpp::Named("predicted") = pred_global, Rcpp::Named("spatialPrior") = spatialPrior, Rcpp::Named("cv") = val_criterion <= cv_criterion, Rcpp::Named("iter_max") = iter ); }
//[[Rcpp::export]] arma::vec cpp_SolvePenaltyL2One (arma::mat x_full, arma::mat x_fancy, arma::vec y_full, arma::mat x_test, arma::vec pars_full, double lambda, int max_iter = 100, double tol = 1e-5) { // The cpp version of the penalized regression solver. // In this case we use a slightly different notation. // Here also we solve for only a single value of lambda. // Args: // x_full: The top four blocks of the matrix fancy X in notes/GLM.pdf. // x_fancy: The fancy X matrix in notes/GLM.pdf. // x_test: The test set design matrix. // y_full: The non-zero part of the fancy y vector of notes/GLM.pdf. // pars_full: The parameter vector (beta, gamma). // ...: The other parameters remain unchanged. // // Returns: // The full parameter vector (beta, gamma). // Initialize p. arma::uword p = x_test.n_cols; // Initialize vectors for algorithm. arma::vec pars_full_new; arma::vec mu_full; arma::vec mu_full_new; // Begin main loop for (int i = 1; i <= max_iter; i++) { // Get current mean vector mu_full = 1/(1 + exp(-x_full * pars_full)); // Create the y fancy vector. arma::vec y_fancy = join_vert(y_full - mu_full, -lambda * x_test * pars_full.tail(p)); // Create the diagonal of the fancy V matrix. arma::vec v_fancy = join_vert(mu_full % (1 - mu_full), lambda * ones<vec>(x_test.n_rows)); // As before, we first obtain the V_fancy * X_fancy. arma::mat var_times_X(x_fancy.begin(), x_fancy.n_rows, x_fancy.n_cols, true); var_times_X.each_col() %= v_fancy; // Update the parameter vector. pars_full_new = pars_full + solve(trans(x_fancy) * var_times_X, trans(x_fancy) * y_fancy); // Check for convergence. if (sum(pow(pars_full_new - pars_full, 2)) < tol) { return pars_full_new; } else { pars_full = pars_full_new; } } // Throw warning if it did not converge. Function warning("warning"); warning("The newton-raphson algorithm did not converge."); // Return the non-converged beta vector. return pars_full_new; }
arma::vec mat2vec(arma::mat y) { arma::vec b(y.begin(), y.size(), /*copy_aux_mem*/false, /*strict*/true); return b; }