// [[Rcpp::export]] SEXP FilterIndepOld (Rcpp::NumericVector y_rcpp, Rcpp::NumericMatrix y_lagged_rcpp, Rcpp::NumericMatrix z_dependent_rcpp, Rcpp::NumericMatrix z_independent_rcpp, Rcpp::NumericVector beta_rcpp, Rcpp::NumericVector mu_rcpp, Rcpp::NumericVector sigma_rcpp, Rcpp::NumericMatrix gamma_dependent_rcpp, Rcpp::NumericVector gamma_independent_rcpp, Rcpp::NumericMatrix transition_probs_rcpp, Rcpp::NumericVector initial_dist_rcpp ) { int n = y_rcpp.size(); int M = mu_rcpp.size(); arma::mat xi_k_t(M, n); // make a transpose first for easier column operations. arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false); arma::mat y_lagged(y_lagged_rcpp.begin(), y_lagged_rcpp.nrow(), y_lagged_rcpp.ncol(), false); arma::mat z_dependent(z_dependent_rcpp.begin(), z_dependent_rcpp.nrow(), z_dependent_rcpp.ncol(), false); arma::mat z_independent(z_independent_rcpp.begin(), z_independent_rcpp.nrow(), z_independent_rcpp.ncol(), false); arma::colvec beta(beta_rcpp.begin(), beta_rcpp.size(), false); arma::colvec mu(mu_rcpp.begin(), mu_rcpp.size(), false); arma::colvec sigma(sigma_rcpp.begin(), sigma_rcpp.size(), false); arma::mat gamma_dependent(gamma_dependent_rcpp.begin(), gamma_dependent_rcpp.nrow(), gamma_dependent_rcpp.ncol(), false); arma::colvec gamma_independent(gamma_independent_rcpp.begin(), gamma_independent_rcpp.size(), false); arma::mat transition_probs(transition_probs_rcpp.begin(), transition_probs_rcpp.nrow(), transition_probs_rcpp.ncol(), false); arma::colvec initial_dist(initial_dist_rcpp.begin(), initial_dist_rcpp.size(), false); double likelihood = 0; SEXP eta_rcpp = EtaIndep(y_rcpp, y_lagged_rcpp, z_dependent_rcpp, z_independent_rcpp, beta_rcpp, mu_rcpp, sigma_rcpp, gamma_dependent_rcpp, gamma_independent_rcpp); arma::mat eta_t = (Rcpp::as<arma::mat>(eta_rcpp)).t(); xi_k_t.col(0) = eta_t.col(0) % initial_dist; double total = sum(xi_k_t.col(0)); xi_k_t.col(0) = xi_k_t.col(0) / total; likelihood += log(total); for (int k = 1; k < n; k++) { xi_k_t.col(k) = eta_t.col(k) % (transition_probs * xi_k_t.col(k-1)); total = sum(xi_k_t.col(k)); xi_k_t.col(k) = xi_k_t.col(k) / total; likelihood += log(total); } return Rcpp::List::create(Named("xi.k") = wrap(xi_k_t.t()), Named("likelihood") = wrap(likelihood)); }
double sPredModule::solveCoef(double wrss) { Rcpp::NumericMatrix cc = d_fac.solve(CHOLMOD_L, d_fac.solve(CHOLMOD_P, d_Vtr)); double ans = sqrt(inner_product(cc.begin(), cc.end(), cc.begin(), double())/wrss); Rcpp::NumericMatrix bb = d_fac.solve(CHOLMOD_Pt, d_fac.solve(CHOLMOD_Lt, cc)); copy(bb.begin(), bb.end(), d_coef.begin()); return ans; }
Rcpp::List RadiusSearch(Rcpp::NumericMatrix query_, Rcpp::NumericMatrix ref_, double radius, int max_neighbour, std::string build, int cores, int checks) { const std::size_t n_dim = query_.ncol(); const std::size_t n_query = query_.nrow(); const std::size_t n_ref = ref_.nrow(); // Column major to row major arma::mat query(n_dim, n_query); { arma::mat temp_q(query_.begin(), n_query, n_dim, false); query = arma::trans(temp_q); } flann::Matrix<double> q_flann(query.memptr(), n_query, n_dim); arma::mat ref(n_dim, n_ref); { arma::mat temp_r(ref_.begin(), n_ref, n_dim, false); ref = arma::trans(temp_r); } flann::Matrix<double> ref_flann(ref.memptr(), n_ref, n_dim); // Setting the flann index params flann::IndexParams params; if (build == "kdtree") { params = flann::KDTreeSingleIndexParams(1); } else if (build == "kmeans") { params = flann::KMeansIndexParams(2, 10, flann::FLANN_CENTERS_RANDOM, 0.2); } else if (build == "linear") { params = flann::LinearIndexParams(); } // Perform the radius search flann::Index<flann::L2<double> > index(ref_flann, params); index.buildIndex(); std::vector< std::vector<int> > indices_flann(n_query, std::vector<int>(max_neighbour)); std::vector< std::vector<double> > dists_flann(n_query, std::vector<double>(max_neighbour)); flann::SearchParams search_params; search_params.cores = cores; search_params.checks = checks; search_params.max_neighbors = max_neighbour; index.radiusSearch(q_flann, indices_flann, dists_flann, radius, search_params); return Rcpp::List::create(Rcpp::Named("indices") = indices_flann, Rcpp::Named("distances") = dists_flann); }
//' Check whether there are any non-finite values in a matrix //' //' The C++ functions will not work with NA values, and the calculation of the //' summary profile will take a long time to run before crashing. //' //' @param matPtr matrix to check. //' //' @return //' Throws an error if any \code{NA}, \code{NaN}, \code{Inf}, or \code{-Inf} //' values are found, otherwise returns silently. //' // [[Rcpp::export]] void CheckFinite(Rcpp::NumericMatrix matPtr) { arma::mat mat = arma::mat(matPtr.begin(), matPtr.nrow(), matPtr.ncol(), false, true); arma::uvec nonFiniteIdx = arma::find_nonfinite(mat); if (nonFiniteIdx.n_elem > 0) { throw Rcpp::exception("matrices cannot have non-finite or missing values"); } }
void eigs_sym_shift_c( mat_op op, int n, int k, double sigma, const spectra_opts *opts, void *data, int *nconv, int *niter, int *nops, double *evals, double *evecs, int *info ) { BEGIN_RCPP CRealShift cmat_op(op, n, data); Rcpp::List res; try { res = run_eigs_shift_sym((RealShift*) &cmat_op, n, k, opts->ncv, opts->rule, sigma, opts->maxitr, opts->tol, opts->retvec != 0); *info = 0; } catch(...) { *info = 1; // indicates error } *nconv = Rcpp::as<int>(res["nconv"]); *niter = Rcpp::as<int>(res["niter"]); *nops = Rcpp::as<int>(res["nops"]); Rcpp::NumericVector val = res["values"]; std::copy(val.begin(), val.end(), evals); if(opts->retvec != 0) { Rcpp::NumericMatrix vec = res["vectors"]; std::copy(vec.begin(), vec.end(), evecs); } VOID_END_RCPP }
void ScoreGaussL0PenScatter::setData(Rcpp::List& data) { std::vector<int>::iterator vi; //uint i; // Cast preprocessed data from R list dout.level(2) << "Casting preprocessed data...\n"; _dataCount = Rcpp::as<std::vector<int> >(data["data.count"]); dout.level(3) << "# samples per vertex: " << _dataCount << "\n"; _totalDataCount = Rcpp::as<uint>(data["total.data.count"]); dout.level(3) << "Total # samples: " << _totalDataCount << "\n"; Rcpp::List scatter = data["scatter"]; Rcpp::NumericMatrix scatterMat; _disjointScatterMatrices.resize(scatter.size()); dout.level(3) << "# disjoint scatter matrices: " << scatter.size() << "\n"; for (R_len_t i = 0; i < scatter.size(); ++i) { scatterMat = Rcpp::NumericMatrix((SEXP)(scatter[i])); _disjointScatterMatrices[i] = arma::mat(scatterMat.begin(), scatterMat.nrow(), scatterMat.ncol(), false); } // Cast index of scatter matrices, adjust R indexing convention to C++ std::vector<int> scatterIndex = Rcpp::as<std::vector<int> >(data["scatter.index"]); for (std::size_t i = 0; i < scatterIndex.size(); ++i) _scatterMatrices[i] = &(_disjointScatterMatrices[scatterIndex[i] - 1]); // Cast lambda: penalty constant _lambda = Rcpp::as<double>(data["lambda"]); dout.level(3) << "Penalty parameter lambda: " << _lambda << "\n"; // Check whether an intercept should be calculated _allowIntercept = Rcpp::as<bool>(data["intercept"]); dout.level(3) << "Include intercept: " << _allowIntercept << "\n"; }
densePred::densePred(Rcpp::NumericMatrix x, Rcpp::NumericVector coef0) throw (std::runtime_error) : predMod(coef0), d_X(x), a_X(x.begin(), x.nrow(), x.ncol(), false, true) { if (d_coef0.size() != d_X.ncol()) throw std::runtime_error("length(coef0) != ncol(X)"); }
// [[Rcpp::export]] SEXP EtaIndep (Rcpp::NumericVector y_rcpp, Rcpp::NumericMatrix y_lagged_rcpp, Rcpp::NumericMatrix z_dependent_rcpp, Rcpp::NumericMatrix z_independent_rcpp, Rcpp::NumericVector beta_rcpp, Rcpp::NumericVector mu_rcpp, Rcpp::NumericVector sigma_rcpp, Rcpp::NumericMatrix gamma_dependent_rcpp, Rcpp::NumericVector gamma_independent_rcpp) { int M = mu_rcpp.size(); int n = y_rcpp.size(); arma::mat eta(n, M); arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false); arma::mat y_lagged(y_lagged_rcpp.begin(), y_lagged_rcpp.nrow(), y_lagged_rcpp.ncol(), false); arma::mat z_dependent(z_dependent_rcpp.begin(), z_dependent_rcpp.nrow(), z_dependent_rcpp.ncol(), false); arma::mat z_independent(z_independent_rcpp.begin(), z_independent_rcpp.nrow(), z_independent_rcpp.ncol(), false); arma::colvec beta(beta_rcpp.begin(), beta_rcpp.size(), false); arma::colvec mu(mu_rcpp.begin(), mu_rcpp.size(), false); arma::colvec sigma(sigma_rcpp.begin(), sigma_rcpp.size(), false); arma::mat gamma_dependent(gamma_dependent_rcpp.begin(), gamma_dependent_rcpp.nrow(), gamma_dependent_rcpp.ncol(), false); arma::colvec gamma_independent(gamma_independent_rcpp.begin(), gamma_independent_rcpp.size(), false); for (int j = 0; j < M; j++) { eta.col(j) = y - y_lagged * beta - z_dependent * gamma_dependent.col(j) - z_independent * gamma_independent - mu(j); eta.col(j) = eta.col(j) % eta.col(j); // element-wise multiplication eta.col(j) = exp(-eta.col(j) / (2 * (sigma(j) * sigma(j)))); eta.col(j) = eta.col(j) / (SQRT2PI * sigma(j)); } return (wrap(eta)); }
// [[Rcpp::export]] arma::mat MVGAUSS(Rcpp::NumericMatrix OMEGA_, int n, int seed) { // std::srand(12523); arma::mat OMEGA( OMEGA_.begin(), OMEGA_.nrow(), OMEGA_.ncol(), false ); arma::vec eigval; arma::mat eigvec; arma::eig_sym(eigval,eigvec, OMEGA); int ncol = OMEGA.n_cols; arma::mat X = arma::randn<arma::mat>(n,ncol); eigval = arma::sqrt(eigval); arma::mat Z = arma::diagmat(eigval); X = eigvec * Z * X.t(); return(X.t()); }
RcppExport SEXP rthxpos(SEXP m) { Rcpp::NumericMatrix tmpm = Rcpp::NumericMatrix(m); int nr = tmpm.nrow(); int nc = tmpm.ncol(); thrust::device_vector<double> dmat(tmpm.begin(),tmpm.end()); // make space for the transpose thrust::device_vector<double> dxp(nr*nc); // iterator to march through the matrix elements thrust::counting_iterator<int> seqb(0); thrust::counting_iterator<int> seqe = seqb + nr*nc; // for each i in seq, copy the matrix elt to its spot in the // transpose thrust::for_each(seqb,seqe, copyelt2xp(dmat.begin(),dxp.begin(),nr,nc)); // prepare the R output, and return it Rcpp::NumericVector routmat(nc*nr); thrust::copy(dxp.begin(),dxp.end(),routmat.begin()); return routmat; }
/** * Update L, Ut and cu for new weights. * * Update Ut from Zt and sqrtXwt, then L from Lambda and Ut * Update cu from wtres, Lambda and Ut. * * @param Xwt Matrix of weights for the model matrix * @param wtres weighted residuals */ void reModule::reweight(Rcpp::NumericMatrix const& Xwt, Rcpp::NumericVector const& wtres) { double mone = -1., one = 1.; int Wnc = Xwt.ncol(); if (d_Ut) M_cholmod_free_sparse(&d_Ut, &c); if (Wnc == 1) { d_Ut = M_cholmod_copy_sparse(&d_Zt, &c); chmDn csqrtX(Xwt); M_cholmod_scale(&csqrtX, CHOLMOD_COL, d_Ut, &c); } else { int n = Xwt.nrow(); CHM_TR tr = M_cholmod_sparse_to_triplet(&d_Zt, &c); int *j = (int*)tr->j, nnz = tr->nnz; double *x = (double*)tr->x, *W = Xwt.begin(); for (int k = 0; k < nnz; k++) { x[k] *= W[j[k]]; j[k] = j[k] % n; } tr->ncol = (size_t)n; d_Ut = M_cholmod_triplet_to_sparse(tr, nnz, &c); M_cholmod_free_triplet(&tr, &c); } // update the factor L CHM_SP LambdatUt = d_Lambda.crossprod(d_Ut); d_L.update(*LambdatUt, 1.); d_ldL2 = d_L.logDet2(); // update cu chmDn ccu(d_cu), cwtres(wtres); copy(d_u0.begin(), d_u0.end(), d_cu.begin()); M_cholmod_sdmult(LambdatUt, 0/*trans*/, &one, &mone, &cwtres, &ccu, &c); M_cholmod_free_sparse(&LambdatUt, &c); NumericMatrix ans = d_L.solve(CHOLMOD_L, d_L.solve(CHOLMOD_P, d_cu)); copy(ans.begin(), ans.end(), d_cu.begin()); d_CcNumer = inner_product(d_cu.begin(), d_cu.end(), d_cu.begin(), double()); }
///' Calculate the network properties, data matrix not provided ///' ///' @details ///' \subsection{Input expectations:}{ ///' Note that this function expects all inputs to be sensible, as checked by ///' the R function 'checkUserInput' and processed by 'networkProperties'. ///' ///' These requirements are: ///' \itemize{ ///' \item{'net' is a square matrix, and its rownames are identical to its ///' column names.} ///' \item{'moduleAssigments' is a named character vector, where the names ///' represent node labels found in the discovery dataset. Unlike ///' 'PermutationProcedure', these may include nodes that are not ///' present in 'data' and 'net'.} ///' \item{The module labels specified in 'modules' must occur in ///' 'moduleAssignments'.} ///' } ///' } ///' ///' @param net adjacency matrix of network edge weights between all pairs of ///' nodes in the dataset in which to calculate the network properties. ///' @param moduleAssignments a named character vector containing the module ///' each node belongs to in the discovery dataset. ///' @param modules a character vector of modules for which to calculate the ///' network properties for. ///' ///' @return a list containing the summary profile, node contribution, module ///' coherence, weighted degree, and average edge weight for each 'module'. ///' ///' @keywords internal // [[Rcpp::export]] Rcpp::List NetPropsNoData ( Rcpp::NumericMatrix net, Rcpp::CharacterVector moduleAssignments, Rcpp::CharacterVector modules ) { // convert the colnames / rownames to C++ equivalents const std::vector<std::string> nodeNames (Rcpp::as<std::vector<std::string>>(colnames(net))); unsigned int nNodes = net.ncol(); R_CheckUserInterrupt(); /* Next, we need to create two mappings: * - From node IDs to indices in the dataset of interest * - From modules to node IDs * - From modules to only node IDs present in the dataset of interest */ const namemap nodeIdxMap = MakeIdxMap(nodeNames); const stringmap modNodeMap = MakeModMap(moduleAssignments); const stringmap modNodePresentMap = MakeModMap(moduleAssignments, nodeIdxMap); // What modules do we actually want to analyse? const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules)); R_CheckUserInterrupt(); // Calculate the network properties for each module std::string mod; // iterators unsigned int mNodesPresent, mNodes; arma::uvec nodeIdx, propIdx, nodeRank; namemap propIdxMap; std::vector<std::string> modNodeNames; arma::vec WD; // results containers double avgWeight; Rcpp::NumericVector degree; // for casting to R equivalents Rcpp::List results; // final storage container for (auto mi = mods.begin(); mi != mods.end(); ++mi) { // What nodes are in this module? // modNodeNames = names(moduleAssignments[moduleAssignments == mod]) mod = *mi; modNodeNames = GetModNodeNames(mod, modNodeMap); // initialise results containers with NA values for nodes not present in // the dataset we're calculating the network properties in. degree = Rcpp::NumericVector(modNodeNames.size(), NA_REAL); avgWeight = NA_REAL; degree.names() = modNodeNames; // Create a mapping between node names and the result vectors propIdxMap = MakeIdxMap(modNodeNames); // Get just the indices of nodes that are present in the requested dataset nodeIdx = GetNodeIdx(mod, modNodePresentMap, nodeIdxMap); mNodesPresent = nodeIdx.n_elem; // And a mapping of those nodes to the initialised vectors propIdx = GetNodeIdx(mod, modNodePresentMap, propIdxMap); mNodes = propIdx.n_elem; // Calculate the properties if the module has nodes in the test dataset if (nodeIdx.n_elem > 0) { // sort the node indices for sequential memory access nodeRank = SortNodes(nodeIdx.memptr(), mNodesPresent); WD = WeightedDegree(net.begin(), nNodes, nodeIdx.memptr(), mNodesPresent); WD = WD(nodeRank); // reorder results avgWeight = AverageEdgeWeight(WD.memptr(), WD.n_elem); R_CheckUserInterrupt(); // Fill the results vectors appropriately Fill(degree, WD.memptr(), mNodesPresent, propIdx.memptr(), mNodes); } results.push_back( Rcpp::List::create( Rcpp::Named("degree") = degree, Rcpp::Named("avgWeight") = avgWeight ) ); } results.names() = mods; return(results); }
densePred::densePred(Rcpp::NumericMatrix x) throw (std::runtime_error) : predMod(x.ncol()), d_X(x), a_X(x.begin(), x.nrow(), x.ncol(), false, true) { }
// [[Rcpp::export]] Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::NumericMatrix input_data, std::vector<std::string> variable_names, uint mtry, uint num_trees, bool verbose, uint seed, uint num_threads, bool write_forest, uint importance_mode_r, uint min_node_size, std::vector<std::vector<double>>& split_select_weights, bool use_split_select_weights, std::vector<std::string>& always_split_variable_names, bool use_always_split_variable_names, std::string status_variable_name, bool prediction_mode, Rcpp::List loaded_forest, Rcpp::RawMatrix sparse_data, bool sample_with_replacement, bool probability, std::vector<std::string>& unordered_variable_names, bool use_unordered_variable_names, bool save_memory, uint splitrule_r, std::vector<double>& case_weights, bool use_case_weights, bool predict_all, bool keep_inbag, double sample_fraction, double alpha, double minprop, bool holdout, uint prediction_type_r) { Rcpp::List result; Forest* forest = 0; Data* data = 0; try { // Empty split select weights and always split variables if not used if (!use_split_select_weights) { split_select_weights.clear(); } if (!use_always_split_variable_names) { always_split_variable_names.clear(); } if (!use_unordered_variable_names) { unordered_variable_names.clear(); } if (!use_case_weights) { case_weights.clear(); } std::ostream* verbose_out; if (verbose) { verbose_out = &Rcpp::Rcout; } else { verbose_out = new std::stringstream; } size_t num_rows = input_data.nrow(); size_t num_cols = input_data.ncol(); // Initialize data with double memmode data = new DataDouble(input_data.begin(), variable_names, num_rows, num_cols); // If there is sparse data, add it if (sparse_data.nrow() > 1) { data->addSparseData(sparse_data.begin(), sparse_data.ncol()); } switch (treetype) { case TREE_CLASSIFICATION: if (probability) { forest = new ForestProbability; } else { forest = new ForestClassification; } break; case TREE_REGRESSION: forest = new ForestRegression; break; case TREE_SURVIVAL: forest = new ForestSurvival; break; case TREE_PROBABILITY: forest = new ForestProbability; break; } ImportanceMode importance_mode = (ImportanceMode) importance_mode_r; SplitRule splitrule = (SplitRule) splitrule_r; PredictionType prediction_type = (PredictionType) prediction_type_r; // Init Ranger forest->initR(dependent_variable_name, data, mtry, num_trees, verbose_out, seed, num_threads, importance_mode, min_node_size, split_select_weights, always_split_variable_names, status_variable_name, prediction_mode, sample_with_replacement, unordered_variable_names, save_memory, splitrule, case_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type); // Load forest object if in prediction mode if (prediction_mode) { size_t dependent_varID = loaded_forest["dependent.varID"]; //size_t num_trees = loaded_forest["num.trees"]; std::vector<std::vector<std::vector<size_t>> > child_nodeIDs = loaded_forest["child.nodeIDs"]; std::vector<std::vector<size_t>> split_varIDs = loaded_forest["split.varIDs"]; std::vector<std::vector<double>> split_values = loaded_forest["split.values"]; std::vector<bool> is_ordered = loaded_forest["is.ordered"]; if (treetype == TREE_CLASSIFICATION) { std::vector<double> class_values = loaded_forest["class.values"]; ((ForestClassification*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, class_values, is_ordered); } else if (treetype == TREE_REGRESSION) { ((ForestRegression*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, is_ordered); } else if (treetype == TREE_SURVIVAL) { size_t status_varID = loaded_forest["status.varID"]; std::vector<std::vector<std::vector<double>> > chf = loaded_forest["chf"]; std::vector<double> unique_timepoints = loaded_forest["unique.death.times"]; ((ForestSurvival*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, status_varID, chf, unique_timepoints, is_ordered); } else if (treetype == TREE_PROBABILITY) { std::vector<double> class_values = loaded_forest["class.values"]; std::vector<std::vector<std::vector<double>>>terminal_class_counts = loaded_forest["terminal.class.counts"]; ((ForestProbability*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, class_values, terminal_class_counts, is_ordered); } } // Run Ranger forest->run(false); if (use_split_select_weights && importance_mode != IMP_NONE) { *verbose_out << "Warning: Split select weights used. Variable importance measures are only comparable for variables with equal weights." << std::endl; } // Use first non-empty dimension of predictions const std::vector<std::vector<std::vector<double>>>& predictions = forest->getPredictions(); if (predictions.size() == 1) { if (predictions[0].size() == 1) { result.push_back(forest->getPredictions()[0][0], "predictions"); } else { result.push_back(forest->getPredictions()[0], "predictions"); } } else { result.push_back(forest->getPredictions(), "predictions"); } // Return output result.push_back(forest->getNumTrees(), "num.trees"); result.push_back(forest->getNumIndependentVariables(), "num.independent.variables"); if (treetype == TREE_SURVIVAL) { ForestSurvival* temp = (ForestSurvival*) forest; result.push_back(temp->getUniqueTimepoints(), "unique.death.times"); } if (!verbose) { std::stringstream temp; temp << verbose_out->rdbuf(); result.push_back(temp.str(), "log"); } if (!prediction_mode) { result.push_back(forest->getMtry(), "mtry"); result.push_back(forest->getMinNodeSize(), "min.node.size"); if (importance_mode != IMP_NONE) { result.push_back(forest->getVariableImportance(), "variable.importance"); } result.push_back(forest->getOverallPredictionError(), "prediction.error"); } if (keep_inbag) { result.push_back(forest->getInbagCounts(), "inbag.counts"); } // Save forest if needed if (write_forest) { Rcpp::List forest_object; forest_object.push_back(forest->getDependentVarId(), "dependent.varID"); forest_object.push_back(forest->getNumTrees(), "num.trees"); forest_object.push_back(forest->getChildNodeIDs(), "child.nodeIDs"); forest_object.push_back(forest->getSplitVarIDs(), "split.varIDs"); forest_object.push_back(forest->getSplitValues(), "split.values"); forest_object.push_back(forest->getIsOrderedVariable(), "is.ordered"); if (treetype == TREE_CLASSIFICATION) { ForestClassification* temp = (ForestClassification*) forest; forest_object.push_back(temp->getClassValues(), "class.values"); } else if (treetype == TREE_PROBABILITY) { ForestProbability* temp = (ForestProbability*) forest; forest_object.push_back(temp->getClassValues(), "class.values"); forest_object.push_back(temp->getTerminalClassCounts(), "terminal.class.counts"); } else if (treetype == TREE_SURVIVAL) { ForestSurvival* temp = (ForestSurvival*) forest; forest_object.push_back(temp->getStatusVarId(), "status.varID"); forest_object.push_back(temp->getChf(), "chf"); forest_object.push_back(temp->getUniqueTimepoints(), "unique.death.times"); } result.push_back(forest_object, "forest"); } delete forest; delete data; } catch (std::exception& e) { if (strcmp(e.what(), "User interrupt.") != 0) { Rcpp::Rcerr << "Error: " << e.what() << " Ranger will EXIT now." << std::endl; } delete forest; delete data; return result; } return result; }
RcppExport SEXP DEoptim(SEXP lowerS, SEXP upperS, SEXP fnS, SEXP controlS, SEXP rhoS) { try { Rcpp::NumericVector f_lower(lowerS), f_upper(upperS); // User-defined bounds Rcpp::List control(controlS); // named list of params double VTR = Rcpp::as<double>(control["VTR"]); // value to reach int i_strategy = Rcpp::as<int>(control["strategy"]); // chooses DE-strategy int i_itermax = Rcpp::as<int>(control["itermax"]); // Maximum number of generations long l_nfeval = 0; // nb of function evals (NOT passed in) int i_D = Rcpp::as<int>(control["npar"]); // Dimension of parameter vector int i_NP = Rcpp::as<int>(control["NP"]); // Number of population members int i_storepopfrom = Rcpp::as<int>(control["storepopfrom"]) - 1; // When to start storing populations int i_storepopfreq = Rcpp::as<int>(control["storepopfreq"]); // How often to store populations int i_specinitialpop = Rcpp::as<int>(control["specinitialpop"]);// User-defined inital population Rcpp::NumericMatrix initialpopm = Rcpp::as<Rcpp::NumericMatrix>(control["initialpop"]); double f_weight = Rcpp::as<double>(control["F"]); // stepsize double f_cross = Rcpp::as<double>(control["CR"]); // crossover probability int i_bs_flag = Rcpp::as<int>(control["bs"]); // Best of parent and child int i_trace = Rcpp::as<int>(control["trace"]); // Print progress? int i_check_winner = Rcpp::as<int>(control["checkWinner"]); // Re-evaluate best parameter vector? int i_av_winner = Rcpp::as<int>(control["avWinner"]); // Average double i_pPct = Rcpp::as<double>(control["p"]); // p to define the top 100p% best solutions arma::colvec minbound(f_lower.begin(), f_lower.size(), false); // convert Rcpp vectors to arma vectors arma::colvec maxbound(f_upper.begin(), f_upper.size(), false); arma::mat initpopm(initialpopm.begin(), initialpopm.rows(), initialpopm.cols(), false); arma::mat ta_popP(i_D, i_NP*2); // Data structures for parameter vectors arma::mat ta_oldP(i_D, i_NP); arma::mat ta_newP(i_D, i_NP); arma::colvec t_bestP(i_D); arma::colvec ta_popC(i_NP*2); // Data structures for obj. fun. values arma::colvec ta_oldC(i_NP); arma::colvec ta_newC(i_NP); double t_bestC; arma::colvec t_bestitP(i_D); arma::colvec t_tmpP(i_D); int i_nstorepop = ceil((i_itermax - i_storepopfrom) / i_storepopfreq); arma::mat d_pop(i_D, i_NP); Rcpp::List d_storepop(i_nstorepop); arma::mat d_bestmemit(i_D, i_itermax); arma::colvec d_bestvalit(i_itermax); int i_iter = 0; // call actual Differential Evolution optimization given the parameters devol(VTR, f_weight, f_cross, i_bs_flag, minbound, maxbound, fnS, rhoS, i_trace, i_strategy, i_D, i_NP, i_itermax, initpopm, i_storepopfrom, i_storepopfreq, i_specinitialpop, i_check_winner, i_av_winner, ta_popP, ta_oldP, ta_newP, t_bestP, ta_popC, ta_oldC, ta_newC, t_bestC, t_bestitP, t_tmpP, d_pop, d_storepop, d_bestmemit, d_bestvalit, i_iter, i_pPct, l_nfeval); return Rcpp::List::create(Rcpp::Named("bestmem") = t_bestP, // and return a named list with results to R Rcpp::Named("bestval") = t_bestC, Rcpp::Named("nfeval") = l_nfeval, Rcpp::Named("iter") = i_iter, Rcpp::Named("bestmemit") = trans(d_bestmemit), Rcpp::Named("bestvalit") = d_bestvalit, Rcpp::Named("pop") = trans(d_pop), Rcpp::Named("storepop") = d_storepop); } catch( std::exception& ex) { forward_exception_to_r(ex); } catch(...) { ::Rf_error( "c++ exception (unknown reason)"); } return R_NilValue; }
IndepTestGauss(uint sampleSize, Rcpp::NumericMatrix& cor) : _sampleSize(sampleSize), _correlation(cor.begin(), cor.nrow(), cor.ncol(), false) {}
//' rcpp_get_polygons //' //' Extracts all polygons from an overpass API query //' //' @param st Text contents of an overpass API query //' @return A \code{SpatialLinesDataFrame} contains all polygons and associated data // [[Rcpp::export]] Rcpp::S4 rcpp_get_polygons (const std::string& st) { #ifdef DUMP_INPUT { std::ofstream dump ("./get-polygons.xml"); if (dump.is_open()) { dump.write (st.c_str(), st.size()); } } #endif XmlPolys xml (st); const std::map <osmid_t, Node>& nodes = xml.nodes (); const std::map <osmid_t, OneWay>& ways = xml.ways (); const std::vector <Relation>& rels = xml.relations (); int count = 0; float xmin = FLOAT_MAX, xmax = -FLOAT_MAX, ymin = FLOAT_MAX, ymax = -FLOAT_MAX; std::vector <float> lons, lats; std::unordered_set <std::string> idset; // see TODO below std::vector <std::string> colnames, rownames, polynames; std::set <std::string> varnames; Rcpp::List dimnames (0), dummy_list (0); Rcpp::NumericMatrix nmat (Rcpp::Dimension (0, 0)); idset.clear (); colnames.push_back ("lon"); colnames.push_back ("lat"); varnames.insert ("name"); // other varnames added below /* * NOTE: Nodes are first loaded into the 2 vectors of (lon, lat), and these * are then copied into nmat. This intermediate can be avoided by loading * directly into nmat using direct indexing rather than iterators, however * this does *NOT* make the routine any faster, and so the current version * which more safely uses iterators is kept instead. */ Rcpp::Environment sp_env = Rcpp::Environment::namespace_env ("sp"); Rcpp::Function Polygon = sp_env ["Polygon"]; Rcpp::Language polygons_call ("new", "Polygons"); Rcpp::S4 polygons; /* * Polygons are extracted from the XmlPolys class in three setps: * 1. Get the names of all polygons that are part of multipolygon relations * 2. Get the names of any remaining ways that are polygonal (start == end) * 3. From the resultant list, extract the actual polygonal ways * * NOTE: OSM polygons are stored as ways, and thus all objects in the class * xmlPolys are rightly referred to as ways. Here within this Rcpp function, * these are referred to as Polygons, but the iteration is over the actual * polygonal ways. */ // Step#1 std::set <osmid_t> the_ways; // Set will only insert unique values for (auto it = rels.begin (); it != rels.end (); ++it) for (auto itw = (*it).ways.begin (); itw != (*it).ways.end (); ++itw) { assert (ways.find (itw->first) != ways.end ()); the_ways.insert (itw->first); } // Step#2 //const std::map <osmid_t, OneWay>& ways = xml.ways (); for (auto it = ways.begin (); it != ways.end (); ++it) { if (the_ways.find ((*it).first) == the_ways.end ()) if ((*it).second.nodes.begin () == (*it).second.nodes.end ()) the_ways.insert ((*it).first); } // Step#2b - Erase any ways that contain no data (should not happen). for (auto it = the_ways.begin (); it != the_ways.end (); ) { auto itw = ways.find (*it); if (itw->second.nodes.size () == 0) it = the_ways.erase (it); else ++it; } Rcpp::List polyList (the_ways.size ()); // Step#3 - Extract and store the_ways for (auto it = the_ways.begin (); it != the_ways.end (); ++it) { auto itw = ways.find (*it); // Collect all unique keys std::for_each (itw->second.key_val.begin (), itw->second.key_val.end (), [&](const std::pair <std::string, std::string>& p) { varnames.insert (p.first); }); /* * The following lines check for duplicate way IDs -- which do very * occasionally occur -- and ensures unique values as required by 'sp' * through appending decimal digits to <osmid_t> OSM IDs. */ std::string id = std::to_string (itw->first); int tempi = 0; while (idset.find (id) != idset.end ()) id = std::to_string (itw->first) + "." + std::to_string (tempi++); idset.insert (id); polynames.push_back (id); // Then iterate over nodes of that way and store all lat-lons size_t n = itw->second.nodes.size (); lons.clear (); lats.clear (); rownames.clear (); lons.reserve (n); lats.reserve (n); rownames.reserve (n); for (auto itn = itw->second.nodes.begin (); itn != itw->second.nodes.end (); ++itn) { // TODO: Propoer exception handler assert (nodes.find (*itn) != nodes.end ()); lons.push_back (nodes.find (*itn)->second.lon); lats.push_back (nodes.find (*itn)->second.lat); rownames.push_back (std::to_string (*itn)); } xmin = std::min (xmin, *std::min_element (lons.begin(), lons.end())); xmax = std::max (xmax, *std::max_element (lons.begin(), lons.end())); ymin = std::min (ymin, *std::min_element (lats.begin(), lats.end())); ymax = std::max (ymax, *std::max_element (lats.begin(), lats.end())); nmat = Rcpp::NumericMatrix (Rcpp::Dimension (lons.size (), 2)); std::copy (lons.begin (), lons.end (), nmat.begin ()); std::copy (lats.begin (), lats.end (), nmat.begin () + lons.size ()); // This only works with push_back, not with direct re-allocation dimnames.push_back (rownames); dimnames.push_back (colnames); nmat.attr ("dimnames") = dimnames; dimnames.erase (0, dimnames.size()); //Rcpp::S4 poly = Rcpp::Language ("Polygon", nmat).eval (); Rcpp::S4 poly = Polygon (nmat); dummy_list.push_back (poly); polygons = polygons_call.eval (); polygons.slot ("Polygons") = dummy_list; polygons.slot ("ID") = id; polyList [count++] = polygons; dummy_list.erase (0); } // end for it over the_ways polyList.attr ("names") = polynames; // Store all key-val pairs in one massive DF int nrow = the_ways.size (), ncol = varnames.size (); Rcpp::CharacterVector kv_vec (nrow * ncol, Rcpp::CharacterVector::get_na ()); int namecoli = std::distance (varnames.begin (), varnames.find ("name")); for (auto it = the_ways.begin (); it != the_ways.end (); ++it) { int rowi = std::distance (the_ways.begin (), it); auto itw = ways.find (*it); kv_vec (namecoli * nrow + rowi) = itw->second.name; for (auto kv_iter = itw->second.key_val.begin (); kv_iter != itw->second.key_val.end (); ++kv_iter) { const std::string& key = (*kv_iter).first; auto ni = varnames.find (key); // key must exist in varnames! int coli = std::distance (varnames.begin (), ni); kv_vec (coli * nrow + rowi) = (*kv_iter).second; } } Rcpp::Language sp_polys_call ("new", "SpatialPolygonsDataFrame"); Rcpp::S4 sp_polys = sp_polys_call.eval (); sp_polys.slot ("polygons") = polyList; sp_polys.slot ("bbox") = rcpp_get_bbox (xmin, xmax, ymin, ymax); Rcpp::Language crs_call ("new", "CRS"); Rcpp::S4 crs = crs_call.eval (); crs.slot ("projargs") = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0"; sp_polys.slot ("proj4string") = crs; Rcpp::CharacterMatrix kv_mat (nrow, ncol, kv_vec.begin()); Rcpp::DataFrame kv_df = kv_mat; kv_df.attr ("names") = varnames; sp_polys.slot ("data") = kv_df; return sp_polys; }
///' Calculate the network properties ///' ///' @details ///' \subsection{Input expectations:}{ ///' Note that this function expects all inputs to be sensible, as checked by ///' the R function 'checkUserInput' and processed by 'networkProperties'. ///' ///' These requirements are: ///' \itemize{ ///' \item{The ordering of node names across 'data' and 'net' is consistent.} ///' \item{The columns of 'data' are the nodes.} ///' \item{'net' is a square matrix, and its rownames are identical to its ///' column names.} ///' \item{'moduleAssigments' is a named character vector, where the names ///' represent node labels found in the discovery dataset. Unlike ///' 'PermutationProcedure', these may include nodes that are not ///' present in 'data' and 'net'.} ///' \item{The module labels specified in 'modules' must occur in ///' 'moduleAssignments'.} ///' } ///' } ///' ///' @param data data matrix from the dataset in which to calculate the network ///' properties. ///' @param net adjacency matrix of network edge weights between all pairs of ///' nodes in the dataset in which to calculate the network properties. ///' @param moduleAssignments a named character vector containing the module ///' each node belongs to in the discovery dataset. ///' @param modules a character vector of modules for which to calculate the ///' network properties for. ///' ///' @return a list containing the summary profile, node contribution, module ///' coherence, weighted degree, and average edge weight for each 'module'. ///' ///' @keywords internal // [[Rcpp::export]] Rcpp::List NetProps ( Rcpp::NumericMatrix data, Rcpp::NumericMatrix net, Rcpp::CharacterVector moduleAssignments, Rcpp::CharacterVector modules ) { // First, scale the matrix data unsigned int nSamples = data.nrow(); unsigned int nNodes = data.ncol(); arma::mat scaledData = Scale(data.begin(), nSamples, nNodes); R_CheckUserInterrupt(); // convert the colnames / rownames to C++ equivalents const std::vector<std::string> nodeNames (Rcpp::as<std::vector<std::string>>(colnames(net))); const std::vector<std::string> sampleNames (Rcpp::as<std::vector<std::string>>(rownames(data))); /* Next, we need to create two mappings: * - From node IDs to indices in the dataset of interest * - From modules to node IDs * - From modules to only node IDs present in the dataset of interest */ const namemap nodeIdxMap = MakeIdxMap(nodeNames); const stringmap modNodeMap = MakeModMap(moduleAssignments); const stringmap modNodePresentMap = MakeModMap(moduleAssignments, nodeIdxMap); // What modules do we actually want to analyse? const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules)); R_CheckUserInterrupt(); // Calculate the network properties for each module std::string mod; // iterators unsigned int mNodesPresent, mNodes; arma::uvec nodeIdx, propIdx, nodeRank; namemap propIdxMap; std::vector<std::string> modNodeNames; arma::vec WD, SP, NC; // results containers double avgWeight, coherence; Rcpp::NumericVector degree, summary, contribution; // for casting to R equivalents Rcpp::List results; // final storage container for (auto mi = mods.begin(); mi != mods.end(); ++mi) { // What nodes are in this module? mod = *mi; modNodeNames = GetModNodeNames(mod, modNodeMap); // initialise results containers with NA values for nodes not present in // the dataset we're calculating the network properties in. degree = Rcpp::NumericVector(modNodeNames.size(), NA_REAL); contribution = Rcpp::NumericVector(modNodeNames.size(), NA_REAL); summary = Rcpp::NumericVector(nSamples, NA_REAL); avgWeight = NA_REAL; coherence = NA_REAL; degree.names() = modNodeNames; contribution.names() = modNodeNames; // Create a mapping between node names and the result vectors propIdxMap = MakeIdxMap(modNodeNames); // Get just the indices of nodes that are present in the requested dataset nodeIdx = GetNodeIdx(mod, modNodePresentMap, nodeIdxMap); mNodesPresent = nodeIdx.n_elem; // And a mapping of those nodes to the initialised vectors propIdx = GetNodeIdx(mod, modNodePresentMap, propIdxMap); mNodes = propIdx.n_elem; // Calculate the properties if the module has nodes in the test dataset if (nodeIdx.n_elem > 0) { // sort the node indices for sequential memory access nodeRank = SortNodes(nodeIdx.memptr(), mNodesPresent); WD = WeightedDegree(net.begin(), nNodes, nodeIdx.memptr(), mNodesPresent); WD = WD(nodeRank); // reorder results avgWeight = AverageEdgeWeight(WD.memptr(), WD.n_elem); R_CheckUserInterrupt(); SP = SummaryProfile(scaledData.memptr(), nSamples, nNodes, nodeIdx.memptr(), mNodesPresent); R_CheckUserInterrupt(); NC = NodeContribution(scaledData.memptr(), nSamples, nNodes, nodeIdx.memptr(), mNodesPresent, SP.memptr()); NC = NC(nodeRank); // reorder results coherence = ModuleCoherence(NC.memptr(), mNodesPresent); R_CheckUserInterrupt(); // Convert NaNs to NAs SP.elem(arma::find_nonfinite(SP)).fill(NA_REAL); NC.elem(arma::find_nonfinite(NC)).fill(NA_REAL); if (!arma::is_finite(coherence)) { coherence = NA_REAL; } // Fill results vectors Fill(degree, WD.memptr(), mNodesPresent, propIdx.memptr(), mNodes); Fill(contribution, NC.memptr(), mNodesPresent, propIdx.memptr(), mNodes); summary = Rcpp::NumericVector(SP.begin(), SP.end()); } summary.names() = sampleNames; results.push_back( Rcpp::List::create( Rcpp::Named("summary") = summary, Rcpp::Named("contribution") = contribution, Rcpp::Named("coherence") = coherence, Rcpp::Named("degree") = degree, Rcpp::Named("avgWeight") = avgWeight ) ); } results.names() = mods; return(results); }
// [[Rcpp::export]] Rcpp::List DEoptim_impl(const arma::colvec & minbound, // user-defined lower bounds const arma::colvec & maxbound, // user-defined upper bounds SEXP fnS, // function to be optimized, either R or C++ const Rcpp::List & control, // parameters SEXP rhoS) { // optional environment double VTR = Rcpp::as<double>(control["VTR"]); // value to reach int i_strategy = Rcpp::as<int>(control["strategy"]); // chooses DE-strategy int i_itermax = Rcpp::as<int>(control["itermax"]); // Maximum number of generations long l_nfeval = 0; // nb of function evals (NOT passed in) int i_D = Rcpp::as<int>(control["npar"]); // Dimension of parameter vector int i_NP = Rcpp::as<int>(control["NP"]); // Number of population members int i_storepopfrom = Rcpp::as<int>(control["storepopfrom"]) - 1; // When to start storing populations int i_storepopfreq = Rcpp::as<int>(control["storepopfreq"]); // How often to store populations int i_specinitialpop = Rcpp::as<int>(control["specinitialpop"]); // User-defined inital population double f_weight = Rcpp::as<double>(control["F"]); // stepsize double f_cross = Rcpp::as<double>(control["CR"]); // crossover probability int i_bs_flag = Rcpp::as<int>(control["bs"]); // Best of parent and child int i_trace = Rcpp::as<int>(control["trace"]); // Print progress? double i_pPct = Rcpp::as<double>(control["p"]); // p to define the top 100p% best solutions double d_c = Rcpp::as<double>(control["c"]); // c as a trigger of the JADE algorithm double d_reltol = Rcpp::as<double>(control["reltol"]); // tolerance for relative convergence test, default to be sqrt(.Machine$double.eps) int i_steptol = Rcpp::as<double>(control["steptol"]); // maximum of iteration after relative convergence test is passed, default to be itermax // as above, doing it in two steps is faster Rcpp::NumericMatrix initialpopm = Rcpp::as<Rcpp::NumericMatrix>(control["initialpop"]); arma::mat initpopm(initialpopm.begin(), initialpopm.rows(), initialpopm.cols(), false); arma::mat ta_popP(i_D, i_NP*2); // Data structures for parameter vectors arma::mat ta_oldP(i_D, i_NP); arma::mat ta_newP(i_D, i_NP); arma::colvec t_bestP(i_D); arma::colvec ta_popC(i_NP*2); // Data structures for obj. fun. values arma::colvec ta_oldC(i_NP); arma::colvec ta_newC(i_NP); double t_bestC; arma::colvec t_bestitP(i_D); arma::colvec t_tmpP(i_D); int i_nstorepop = static_cast<int>(ceil(static_cast<double>((i_itermax - i_storepopfrom) / i_storepopfreq))); arma::mat d_pop(i_D, i_NP); Rcpp::List d_storepop(i_nstorepop); arma::mat d_bestmemit(i_D, i_itermax); arma::colvec d_bestvalit(i_itermax); int i_iter = 0; // call actual Differential Evolution optimization given the parameters devol(VTR, f_weight, f_cross, i_bs_flag, minbound, maxbound, fnS, rhoS, i_trace, i_strategy, i_D, i_NP, i_itermax, initpopm, i_storepopfrom, i_storepopfreq, i_specinitialpop, ta_popP, ta_oldP, ta_newP, t_bestP, ta_popC, ta_oldC, ta_newC, t_bestC, t_bestitP, t_tmpP, d_pop, d_storepop, d_bestmemit, d_bestvalit, i_iter, i_pPct, d_c, l_nfeval, d_reltol, i_steptol); return Rcpp::List::create(Rcpp::Named("bestmem") = t_bestP, // and return a named list with results to R Rcpp::Named("bestval") = t_bestC, Rcpp::Named("nfeval") = l_nfeval, Rcpp::Named("iter") = i_iter, Rcpp::Named("bestmemit") = trans(d_bestmemit), Rcpp::Named("bestvalit") = d_bestvalit, Rcpp::Named("pop") = trans(d_pop), Rcpp::Named("storepop") = d_storepop); }
// [[Rcpp::export]] SEXP EMcppARMSIQuZhuo (Rcpp::NumericVector y_rcpp, Rcpp::NumericMatrix y_lagged_rcpp, Rcpp::NumericMatrix z_dependent_rcpp, Rcpp::NumericMatrix z_independent_rcpp, Rcpp::NumericMatrix beta0_rcpp, Rcpp::NumericVector mu0_rcpp, Rcpp::NumericVector sigma0_rcpp, Rcpp::NumericMatrix gamma_dependent0_rcpp, Rcpp::NumericMatrix gamma_independent0_rcpp, Rcpp::NumericMatrix transition_probs0_rcpp, Rcpp::NumericVector initial_dist0_rcpp, int maxit, double epsilon, double transition_probs_min, double transition_probs_max, double sigma_min) { // conversion first arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false); arma::mat y_lagged(y_lagged_rcpp.begin(), y_lagged_rcpp.nrow(), y_lagged_rcpp.ncol(), false); arma::mat z_dependent(z_dependent_rcpp.begin(), z_dependent_rcpp.nrow(), z_dependent_rcpp.ncol(), false); arma::mat z_independent(z_independent_rcpp.begin(), z_independent_rcpp.nrow(), z_independent_rcpp.ncol(), false); arma::mat beta0(beta0_rcpp.begin(), beta0_rcpp.nrow(), beta0_rcpp.ncol(), false); arma::colvec mu0(mu0_rcpp.begin(), mu0_rcpp.size(), false); arma::colvec sigma0(sigma0_rcpp.begin(), sigma0_rcpp.size(), false); arma::mat gamma_dependent0(gamma_dependent0_rcpp.begin(), gamma_dependent0_rcpp.nrow(), gamma_dependent0_rcpp.ncol(), false); arma::mat gamma_independent0(gamma_independent0_rcpp.begin(), gamma_independent0_rcpp.nrow(), gamma_independent0_rcpp.ncol(), false); arma::mat transition_probs0(transition_probs0_rcpp.begin(), transition_probs0_rcpp.nrow(), transition_probs0_rcpp.ncol(), false); arma::colvec initial_dist0(initial_dist0_rcpp.begin(), initial_dist0_rcpp.size(), false); arma::mat y_lagged_t = y_lagged.t(); arma::mat z_dependent_t = z_dependent.t(); arma::mat z_independent_t = z_independent.t(); arma::colvec likelihoods(maxit, arma::fill::zeros); int index_exit = -1; Theta theta_original(beta0, mu0, sigma0, gamma_dependent0, gamma_independent0, transition_probs0, initial_dist0); Theta theta_updated = theta_original; Theta theta = theta_updated; theta.likelihood = -std::numeric_limits<double>::infinity(); likelihoods(0) = -std::numeric_limits<double>::infinity(); // 1. EM iterations for (int i = 1; i < maxit; i++) { index_exit++; Xi e_step = ExpectationStepARMSIQuZhuo(&y, &y_lagged, &z_dependent, &z_independent, &theta_updated); likelihoods(i) = theta_updated.likelihood; // Stop if 1. the difference in likelihoods is small enough // or 2. likelihood decreases (a decrease is due to locating local max. // out of hard constraints in maximization step, which suggests that this // is not a good candidate anyway.) // Use negative condition to stop if 3. the updated likelihood is NaN. if (!((theta_updated.likelihood - theta.likelihood) >= epsilon)) break; theta = theta_updated; theta_updated = MaximizationStepARMSIQuZhuo(&y, &y_lagged, &z_dependent, &z_independent, &y_lagged_t, &z_dependent_t, &z_independent_t, &theta, &(e_step.xi_k), &(e_step.xi_past_t), &(e_step.xi_n), transition_probs_min, transition_probs_max, sigma_min); // printf("sigma updated at %d: %f", i, theta_updated.sigma(0)); } likelihoods = likelihoods.subvec(0, std::min((index_exit + 1), (maxit - 1))); // remove unused elements Rcpp::List theta_R = Rcpp::List::create(Named("beta") = wrap(theta.beta), Named("mu") = wrap(theta.mu), Named("sigma") = wrap(theta.sigma), Named("gamma.dependent") = wrap(theta.gamma_dependent), Named("gamma.independent") = wrap(theta.gamma_independent), Named("transition.probs") = wrap(theta.transition_probs), Named("initial.dist") = wrap(theta.initial_dist) ); return Rcpp::List::create(Named("theta") = wrap(theta_R), Named("likelihood") = wrap(theta.likelihood), Named("likelihoods") = wrap(likelihoods)); }
///' Calculate the intermediate network properties in the discovery dataset ///' ///' These properties are need at every permutation: so they will be computed ///' once. ///' ///' @details ///' \subsection{Input expectations:}{ ///' Note that this function expects all inputs to be sensible, as checked by ///' the R function 'checkUserInput' and processed by 'modulePreservation'. ///' ///' These requirements are: ///' \itemize{ ///' \item{The ordering of node names across 'dData', 'dCorr', and 'dNet' is ///' consistent.} ///' \item{The columns of 'dData' are the nodes.} ///' \item{'dData' has been scaled by 'Scale'.} ///' \item{'dCorr' and 'dNet' are square matrices, and their rownames are ///' identical to their column names.} ///' \item{'moduleAssigments' is a named character vector, where the names ///' represent node labels found in the discovery dataset (e.g. 'dNet').} ///' } ///' } ///' ///' @param dData scaled data matrix from the \emph{discovery} dataset. ///' @param dCorr matrix of correlation coefficients between all pairs of ///' variables/nodes in the \emph{discovery} dataset. ///' @param dNet adjacency matrix of network edge weights between all pairs of ///' nodes in the \emph{discovery} dataset. ///' @param tNodeNames a character vector of node names in the test dataset ///' @param moduleAssignments a named character vector containing the module ///' each node belongs to in the discovery dataset. ///' @param modules a character vector of modules for which to calculate the ///' module preservation statistics. ///' ///' @return a list containing three lists: a list of weighted degree vectors, ///' a list of correlation coefficient vectors, and a list of node ///' contribution vectors. There is one vector for each module in each list. ///' ///' @keywords internal // [[Rcpp::export]] Rcpp::List IntermediateProperties ( Rcpp::NumericMatrix dData, Rcpp::NumericMatrix dCorr, Rcpp::NumericMatrix dNet, Rcpp::CharacterVector tNodeNames, Rcpp::CharacterVector moduleAssignments, Rcpp::CharacterVector modules ) { // First, scale the matrix data unsigned int nSamples = dData.nrow(); unsigned int nNodes = dData.ncol(); R_CheckUserInterrupt(); // convert the colnames / rownames to C++ equivalents const std::vector<std::string> dNames (Rcpp::as<std::vector<std::string>>(colnames(dNet))); const std::vector<std::string> tNames (Rcpp::as<std::vector<std::string>>(tNodeNames)); /* Next, we need to create three mappings: * - From node IDs to indices in the discovery dataset. * - From modules to all node IDs. * - From modules to just node IDs present in the test dataset. */ const namemap dIdxMap = MakeIdxMap(dNames); const stringmap modNodeMap = MakeModMap(moduleAssignments); const namemap tIdxMap = MakeIdxMap(tNames); const stringmap modNodePresentMap = MakeModMap(moduleAssignments, tIdxMap); // What modules do we actually want to analyse? const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules)); // We only need to iterate through modules which have nodes in the test // dataset std::vector<std::string> modsPresent; for (auto it = mods.begin(); it != mods.end(); ++it) { if (modNodePresentMap.count(*it) > 0) { modsPresent.push_back(*it); } } R_CheckUserInterrupt(); Rcpp::List degree; Rcpp::List corr; Rcpp::List contribution; // Calculate the network properties in the discovery dataset. std::string mod; unsigned int mNodes; arma::uvec dIdx, dRank; arma::vec dSP, dWD, dCV, dNC; for (auto mi = modsPresent.begin(); mi != modsPresent.end(); ++mi) { // Get the node indices in the discovery dataset for this module mod = *mi; dIdx = GetNodeIdx(mod, modNodePresentMap, dIdxMap); mNodes = dIdx.n_elem; R_CheckUserInterrupt(); // Calculate the network properties and insert into their storage containers dCV = CorrVector(dCorr.begin(), nNodes, dIdx.memptr(), mNodes); R_CheckUserInterrupt(); // Sort node indices for sequential memory access dRank = SortNodes(dIdx.memptr(), mNodes); dWD = WeightedDegree(dNet.begin(), nNodes, dIdx.memptr(), mNodes); dWD = dWD(dRank); // reorder R_CheckUserInterrupt(); dSP = SummaryProfile(dData.begin(), nSamples, nNodes, dIdx.memptr(), mNodes); R_CheckUserInterrupt(); dNC = NodeContribution(dData.begin(), nSamples, nNodes, dIdx.memptr(), mNodes, dSP.memptr()); dNC = dNC(dRank); // reorder results R_CheckUserInterrupt(); // Cast to R-vectors and add to results lists corr.push_back(Rcpp::NumericVector(dCV.begin(), dCV.end())); degree.push_back(Rcpp::NumericVector(dWD.begin(), dWD.end())); contribution.push_back(Rcpp::NumericVector(dNC.begin(), dNC.end())); } degree.names() = modsPresent; corr.names() = modsPresent; contribution.names() = modsPresent; return Rcpp::List::create( Rcpp::Named("degree") = degree, Rcpp::Named("corr") = corr, Rcpp::Named("contribution") = contribution ); }
// Returns an n by 1 column that represents the likelihoods of // an estimated univariate MS-AR model for each k where 1 \leq k \leq n where // beta is switching. // Note that even if beta is non-switching, setting beta as a s by M matrix with // repeated column of the original beta will give you the likelihood for // MS-AR model with non-switching beta. // TODO: Implement the version where z_dependent/z_independent exist(lagged variables should be implemented for MSM models) // [[Rcpp::export]] SEXP LikelihoodsMSMAR (Rcpp::NumericVector y_rcpp, Rcpp::NumericMatrix y_lagged_rcpp, Rcpp::NumericMatrix z_dependent_rcpp, Rcpp::NumericMatrix z_independent_rcpp, Rcpp::NumericMatrix z_dependent_lagged_rcpp, Rcpp::NumericMatrix z_independent_lagged_rcpp, Rcpp::NumericMatrix transition_probs_rcpp, Rcpp::NumericVector initial_dist_extended_rcpp, Rcpp::NumericMatrix beta_rcpp, Rcpp::NumericVector mu_rcpp, Rcpp::NumericVector sigma_rcpp, Rcpp::NumericMatrix gamma_dependent_rcpp, Rcpp::NumericVector gamma_independent_rcpp, Rcpp::IntegerMatrix state_conversion_mat_rcpp ) { int n = y_rcpp.size(); arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false); arma::mat y_lagged(y_lagged_rcpp.begin(), y_lagged_rcpp.nrow(), y_lagged_rcpp.ncol(), false); arma::mat z_dependent(z_dependent_rcpp.begin(), z_dependent_rcpp.nrow(), z_dependent_rcpp.ncol(), false); arma::mat z_dependent_lagged(z_dependent_lagged_rcpp.begin(), z_dependent_lagged_rcpp.nrow(), z_dependent_lagged_rcpp.ncol(), false); arma::mat z_independent(z_independent_rcpp.begin(), z_independent_rcpp.nrow(), z_independent_rcpp.ncol(), false); arma::mat z_independent_lagged(z_independent_lagged_rcpp.begin(), z_independent_lagged_rcpp.nrow(), z_independent_lagged_rcpp.ncol(), false); arma::mat transition_probs(transition_probs_rcpp.begin(), transition_probs_rcpp.nrow(), transition_probs_rcpp.ncol(), false); arma::colvec initial_dist_extended(initial_dist_extended_rcpp.begin(), initial_dist_extended_rcpp.size(), false); arma::mat beta(beta_rcpp.begin(), beta_rcpp.nrow(), beta_rcpp.ncol(), false); arma::colvec mu(mu_rcpp.begin(), mu_rcpp.size(), false); arma::colvec sigma(sigma_rcpp.begin(), sigma_rcpp.size(), false); arma::mat gamma_dependent(gamma_dependent_rcpp.begin(), gamma_dependent_rcpp.nrow(), gamma_dependent_rcpp.ncol(), false); arma::colvec gamma_independent(gamma_independent_rcpp.begin(), gamma_independent_rcpp.size(), false); arma::imat state_conversion_mat(state_conversion_mat_rcpp.begin(), state_conversion_mat_rcpp.nrow(), state_conversion_mat_rcpp.ncol(), false); arma::mat transition_probs_extended = GetExtendedTransitionProbs( transition_probs, state_conversion_mat); arma::mat transition_probs_extended_t = transition_probs_extended.t(); arma::colvec likelihoods(n, arma::fill::zeros); int M_extended = transition_probs_extended_t.n_rows; int M = gamma_dependent.n_cols; int s = beta.n_rows; int M_extended_block = IntPower(M, s); arma::mat* xi_k_t = new arma::mat(M_extended, n, arma::fill::zeros); // make a transpose first for col operations. // partition blocks int p = gamma_dependent.n_rows; int q = gamma_independent.size(); arma::mat* z_dependent_lagged_blocks = new arma::mat[s]; arma::mat* z_independent_lagged_blocks = new arma::mat[s]; for (int i = 0; i < s; i++) { int z_dependent_block_first = i * p; int z_independent_block_first = i * q; z_dependent_lagged_blocks[i] = z_dependent_lagged.cols(z_dependent_block_first, z_dependent_block_first + p - 1); z_independent_lagged_blocks[i] = z_independent_lagged.cols(z_independent_block_first, z_independent_block_first + q - 1); } for (int k = 0; k < n; k++) { // initial setting; keep track of minimum value and its index // to divide everything by the min. value in order to prevent // possible numerical errors when computing posterior probs. int min_index = -1; double min_value = std::numeric_limits<double>::infinity(); double* ratios = new double[M_extended]; double row_sum = 0; arma::colvec xi_past; if (k > 0) xi_past = transition_probs_extended_t * exp(xi_k_t->col(k-1)); else xi_past = initial_dist_extended; xi_past /= arma::sum(xi_past); for (int j_M = 0; j_M < M; j_M++) { for (int j_extra = 0; j_extra < M_extended_block; j_extra++) { int j = j_M * M_extended_block + j_extra; arma::colvec xi_k_t_jk = y.row(k); // arma::colvec xi_k_t_jk = y.row(k) - // z_dependent.row(k) * gamma_dependent.col(j_M) - // z_independent.row(k) * gamma_independent - mu(j_M); for (int lag = 0; lag < s; lag++) { int lagged_index = state_conversion_mat.at((lag + 1), j); xi_k_t_jk -= beta.at(lag, j_M) * (y_lagged.at(k, lag) - mu(lagged_index)); xi_k_t_jk -= beta.at(lag, j_M) * (y_lagged.at(k, lag) - z_dependent_lagged_blocks[lag].row(k) * gamma_dependent.col(lagged_index) - z_independent_lagged_blocks[lag].row(k) * gamma_independent - mu(lagged_index)); } xi_k_t->at(j,k) = xi_k_t_jk(0); // explicit gluing xi_k_t->at(j,k) *= xi_k_t->at(j,k); xi_k_t->at(j,k) = xi_k_t->at(j,k) / (2 * (sigma(j_M) * sigma(j_M))); if (min_value > xi_k_t->at(j,k)) { min_value = xi_k_t->at(j,k); min_index = j; } // SQRT2PI only matters in calculation of eta; // you can remove it in the final log-likelihood. ratios[j] = xi_past(j) / sigma(j_M); } } for (int j = 0; j < M_extended; j++) { if (j == min_index) row_sum += 1.0; else row_sum += (ratios[j] / ratios[min_index]) * exp(min_value - xi_k_t->at(j,k)); xi_k_t->at(j,k) += log(ratios[j]); } likelihoods(k) = log(row_sum) - min_value + log(ratios[min_index]) - LOG2PI_OVERTWO; delete[] ratios; // clear memory } arma::exp(xi_k_t->cols(1,4)).t().print(); // clear memory for blocks delete[] z_dependent_lagged_blocks; delete[] z_independent_lagged_blocks; delete xi_k_t; return (wrap(likelihoods)); }