Esempio n. 1
0
// [[Rcpp::export]]
SEXP FilterIndepOld  (Rcpp::NumericVector y_rcpp,
					Rcpp::NumericMatrix y_lagged_rcpp,
					Rcpp::NumericMatrix z_dependent_rcpp,
					Rcpp::NumericMatrix z_independent_rcpp,
					Rcpp::NumericVector beta_rcpp,
					Rcpp::NumericVector mu_rcpp,
					Rcpp::NumericVector sigma_rcpp,
					Rcpp::NumericMatrix gamma_dependent_rcpp,
					Rcpp::NumericVector gamma_independent_rcpp,
					Rcpp::NumericMatrix transition_probs_rcpp,
					Rcpp::NumericVector initial_dist_rcpp
					)
{
	int n = y_rcpp.size();
	int M = mu_rcpp.size();
	arma::mat xi_k_t(M, n); // make a transpose first for easier column operations.

	arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false);
	arma::mat    y_lagged(y_lagged_rcpp.begin(),
								y_lagged_rcpp.nrow(), y_lagged_rcpp.ncol(), false);
	arma::mat    z_dependent(z_dependent_rcpp.begin(),
								z_dependent_rcpp.nrow(), z_dependent_rcpp.ncol(), false);
	arma::mat    z_independent(z_independent_rcpp.begin(),
								z_independent_rcpp.nrow(), z_independent_rcpp.ncol(), false);
	arma::colvec beta(beta_rcpp.begin(), beta_rcpp.size(), false);
	arma::colvec mu(mu_rcpp.begin(), mu_rcpp.size(), false);
	arma::colvec sigma(sigma_rcpp.begin(), sigma_rcpp.size(), false);
	arma::mat    gamma_dependent(gamma_dependent_rcpp.begin(),
								gamma_dependent_rcpp.nrow(), gamma_dependent_rcpp.ncol(), false);
	arma::colvec gamma_independent(gamma_independent_rcpp.begin(),
								gamma_independent_rcpp.size(), false);
	arma::mat    transition_probs(transition_probs_rcpp.begin(),
								transition_probs_rcpp.nrow(), transition_probs_rcpp.ncol(), false);
	arma::colvec initial_dist(initial_dist_rcpp.begin(), initial_dist_rcpp.size(), false);

	double likelihood = 0;

	SEXP eta_rcpp = EtaIndep(y_rcpp, y_lagged_rcpp,
						z_dependent_rcpp, z_independent_rcpp,
						beta_rcpp, mu_rcpp, sigma_rcpp,
						gamma_dependent_rcpp, gamma_independent_rcpp);
	arma::mat eta_t = (Rcpp::as<arma::mat>(eta_rcpp)).t();

	xi_k_t.col(0) = eta_t.col(0) % initial_dist;
	double total = sum(xi_k_t.col(0));
	xi_k_t.col(0) = xi_k_t.col(0) / total;
	likelihood += log(total);

	for (int k = 1; k < n; k++)
	{
	  	xi_k_t.col(k) = eta_t.col(k) % (transition_probs * xi_k_t.col(k-1));
	 	total = sum(xi_k_t.col(k));
		xi_k_t.col(k) = xi_k_t.col(k) / total;
		likelihood += log(total);
	}

	return Rcpp::List::create(Named("xi.k") = wrap(xi_k_t.t()),
														Named("likelihood") = wrap(likelihood));
}
Esempio n. 2
0
    double sPredModule::solveCoef(double wrss) {
	Rcpp::NumericMatrix
	    cc = d_fac.solve(CHOLMOD_L, d_fac.solve(CHOLMOD_P, d_Vtr));
	double ans = sqrt(inner_product(cc.begin(), cc.end(),
					cc.begin(), double())/wrss);
	Rcpp::NumericMatrix
	    bb = d_fac.solve(CHOLMOD_Pt, d_fac.solve(CHOLMOD_Lt, cc));
	copy(bb.begin(), bb.end(), d_coef.begin());
	return ans;
    }
Esempio n. 3
0
Rcpp::List RadiusSearch(Rcpp::NumericMatrix query_,
                        Rcpp::NumericMatrix ref_,
                        double radius,
                        int max_neighbour,
                        std::string build,
                        int cores,
                        int checks) {
  const std::size_t n_dim = query_.ncol();
  const std::size_t n_query = query_.nrow();
  const std::size_t n_ref = ref_.nrow();
  // Column major to row major
  arma::mat query(n_dim, n_query);
  {
    arma::mat temp_q(query_.begin(), n_query, n_dim, false);
    query = arma::trans(temp_q);
  }
  flann::Matrix<double> q_flann(query.memptr(), n_query, n_dim);
  arma::mat ref(n_dim, n_ref);
  {
    arma::mat temp_r(ref_.begin(), n_ref, n_dim, false);
    ref = arma::trans(temp_r);
  }
  flann::Matrix<double> ref_flann(ref.memptr(), n_ref, n_dim);
  // Setting the flann index params
  flann::IndexParams params;
  if (build == "kdtree") {
    params = flann::KDTreeSingleIndexParams(1);
  } else if (build == "kmeans") {
    params = flann::KMeansIndexParams(2, 10, flann::FLANN_CENTERS_RANDOM, 0.2);
  } else if (build == "linear") {
    params = flann::LinearIndexParams();
  }
  // Perform the radius search
  flann::Index<flann::L2<double> > index(ref_flann, params);
  index.buildIndex();
  std::vector< std::vector<int> >
      indices_flann(n_query, std::vector<int>(max_neighbour));
  std::vector< std::vector<double> >
      dists_flann(n_query, std::vector<double>(max_neighbour));
  flann::SearchParams search_params;
  search_params.cores = cores;
  search_params.checks = checks;
  search_params.max_neighbors = max_neighbour;
  index.radiusSearch(q_flann, indices_flann, dists_flann, radius,
                     search_params);
  return Rcpp::List::create(Rcpp::Named("indices") = indices_flann,
                            Rcpp::Named("distances") = dists_flann);
}
Esempio n. 4
0
//' Check whether there are any non-finite values in a matrix
//'
//' The C++ functions will not work with NA values, and the calculation of the
//' summary profile will take a long time to run before crashing.
//'
//' @param matPtr matrix to check.
//' 
//' @return
//'  Throws an error if any \code{NA}, \code{NaN}, \code{Inf}, or \code{-Inf}
//'  values are found, otherwise returns silently.
//' 
// [[Rcpp::export]]
void CheckFinite(Rcpp::NumericMatrix matPtr) {
  arma::mat mat = arma::mat(matPtr.begin(), matPtr.nrow(), matPtr.ncol(), false, true);
  arma::uvec nonFiniteIdx = arma::find_nonfinite(mat);
  if (nonFiniteIdx.n_elem > 0) {
    throw Rcpp::exception("matrices cannot have non-finite or missing values");
  } 
}
Esempio n. 5
0
void eigs_sym_shift_c(
    mat_op op, int n, int k, double sigma,
    const spectra_opts *opts, void *data,
    int *nconv, int *niter, int *nops,
    double *evals, double *evecs, int *info
)
{
    BEGIN_RCPP

    CRealShift cmat_op(op, n, data);
    Rcpp::List res;
    try {
        res = run_eigs_shift_sym((RealShift*) &cmat_op, n, k, opts->ncv, opts->rule,
                                 sigma, opts->maxitr, opts->tol, opts->retvec != 0);
        *info = 0;
    } catch(...) {
        *info = 1;  // indicates error
    }

    *nconv = Rcpp::as<int>(res["nconv"]);
    *niter = Rcpp::as<int>(res["niter"]);
    *nops  = Rcpp::as<int>(res["nops"]);
    Rcpp::NumericVector val = res["values"];
    std::copy(val.begin(), val.end(), evals);
    if(opts->retvec != 0)
    {
        Rcpp::NumericMatrix vec = res["vectors"];
        std::copy(vec.begin(), vec.end(), evecs);
    }

    VOID_END_RCPP
}
Esempio n. 6
0
void ScoreGaussL0PenScatter::setData(Rcpp::List& data)
{
	std::vector<int>::iterator vi;
	//uint i;

	// Cast preprocessed data from R list
	dout.level(2) << "Casting preprocessed data...\n";
	_dataCount = Rcpp::as<std::vector<int> >(data["data.count"]);
	dout.level(3) << "# samples per vertex: " << _dataCount << "\n";
	_totalDataCount = Rcpp::as<uint>(data["total.data.count"]);
	dout.level(3) << "Total # samples: " << _totalDataCount << "\n";
	Rcpp::List scatter = data["scatter"];
	Rcpp::NumericMatrix scatterMat;
	_disjointScatterMatrices.resize(scatter.size());
	dout.level(3) << "# disjoint scatter matrices: " << scatter.size() << "\n";
	for (R_len_t i = 0; i < scatter.size(); ++i) {
		scatterMat = Rcpp::NumericMatrix((SEXP)(scatter[i]));
		_disjointScatterMatrices[i] = arma::mat(scatterMat.begin(), scatterMat.nrow(), scatterMat.ncol(), false);
	}

	// Cast index of scatter matrices, adjust R indexing convention to C++
	std::vector<int> scatterIndex = Rcpp::as<std::vector<int> >(data["scatter.index"]);
	for (std::size_t i = 0; i < scatterIndex.size(); ++i)
		_scatterMatrices[i] = &(_disjointScatterMatrices[scatterIndex[i] - 1]);

	// Cast lambda: penalty constant
	_lambda = Rcpp::as<double>(data["lambda"]);
	dout.level(3) << "Penalty parameter lambda: " << _lambda << "\n";

	// Check whether an intercept should be calculated
	_allowIntercept = Rcpp::as<bool>(data["intercept"]);
	dout.level(3) << "Include intercept: " << _allowIntercept << "\n";
}
Esempio n. 7
0
    densePred::densePred(Rcpp::NumericMatrix x,
			 Rcpp::NumericVector coef0)
	throw (std::runtime_error)
	: predMod(coef0), d_X(x),
	  a_X(x.begin(), x.nrow(), x.ncol(), false, true) {
	if (d_coef0.size() != d_X.ncol())
	    throw std::runtime_error("length(coef0) != ncol(X)");
    }
Esempio n. 8
0
// [[Rcpp::export]]
SEXP EtaIndep  (Rcpp::NumericVector y_rcpp,
				Rcpp::NumericMatrix y_lagged_rcpp,
				Rcpp::NumericMatrix z_dependent_rcpp,
				Rcpp::NumericMatrix z_independent_rcpp,
				Rcpp::NumericVector beta_rcpp,
				Rcpp::NumericVector mu_rcpp,
				Rcpp::NumericVector sigma_rcpp,
				Rcpp::NumericMatrix gamma_dependent_rcpp,
				Rcpp::NumericVector gamma_independent_rcpp)
{
	int M = mu_rcpp.size();
	int n = y_rcpp.size();
	arma::mat eta(n, M);

	arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false);
	arma::mat    y_lagged(y_lagged_rcpp.begin(),
												y_lagged_rcpp.nrow(), y_lagged_rcpp.ncol(), false);
	arma::mat    z_dependent(z_dependent_rcpp.begin(),
													z_dependent_rcpp.nrow(), z_dependent_rcpp.ncol(), false);
	arma::mat    z_independent(z_independent_rcpp.begin(),
														z_independent_rcpp.nrow(), z_independent_rcpp.ncol(), false);
	arma::colvec beta(beta_rcpp.begin(), beta_rcpp.size(), false);
	arma::colvec mu(mu_rcpp.begin(), mu_rcpp.size(), false);
	arma::colvec sigma(sigma_rcpp.begin(), sigma_rcpp.size(), false);
	arma::mat    gamma_dependent(gamma_dependent_rcpp.begin(),
																gamma_dependent_rcpp.nrow(), gamma_dependent_rcpp.ncol(), false);
	arma::colvec gamma_independent(gamma_independent_rcpp.begin(),
																	gamma_independent_rcpp.size(), false);


	for (int j = 0; j < M; j++)
	{
  	eta.col(j) = y - y_lagged * beta -
								z_dependent * gamma_dependent.col(j) -
								z_independent * gamma_independent - mu(j);
		eta.col(j) = eta.col(j) % eta.col(j); // element-wise multiplication
		eta.col(j) = exp(-eta.col(j) / (2 * (sigma(j) * sigma(j))));
		eta.col(j) = eta.col(j) / (SQRT2PI * sigma(j));
	}

	return (wrap(eta));
}
Esempio n. 9
0
// [[Rcpp::export]]
arma::mat MVGAUSS(Rcpp::NumericMatrix OMEGA_, int n, int seed) {

  //  std::srand(12523);

  arma::mat OMEGA( OMEGA_.begin(), OMEGA_.nrow(), OMEGA_.ncol(), false );

  arma::vec eigval;
  arma::mat eigvec;
  arma::eig_sym(eigval,eigvec, OMEGA);

 int ncol = OMEGA.n_cols;

  arma::mat X = arma::randn<arma::mat>(n,ncol);

  eigval = arma::sqrt(eigval);
  arma::mat Z = arma::diagmat(eigval);
  X = eigvec * Z * X.t();
  return(X.t());
}
Esempio n. 10
0
RcppExport SEXP rthxpos(SEXP m) 
{
   Rcpp::NumericMatrix tmpm = Rcpp::NumericMatrix(m);
   int nr = tmpm.nrow();
   int nc = tmpm.ncol();
   thrust::device_vector<double> dmat(tmpm.begin(),tmpm.end());
   // make space for the transpose
   thrust::device_vector<double> dxp(nr*nc);
   // iterator to march through the matrix elements
   thrust::counting_iterator<int> seqb(0);
   thrust::counting_iterator<int> seqe = seqb + nr*nc;
   // for each i in seq, copy the matrix elt to its spot in the
   // transpose
   thrust::for_each(seqb,seqe,
      copyelt2xp(dmat.begin(),dxp.begin(),nr,nc));
   // prepare the R output, and return it
   Rcpp::NumericVector routmat(nc*nr);
   thrust::copy(dxp.begin(),dxp.end(),routmat.begin());
   return routmat;
}
Esempio n. 11
0
    /** 
     * Update L, Ut and cu for new weights.
     *
     * Update Ut from Zt and sqrtXwt, then L from Lambda and Ut
     * Update cu from wtres, Lambda and Ut.
     * 
     * @param Xwt Matrix of weights for the model matrix
     * @param wtres weighted residuals
     */
    void reModule::reweight(Rcpp::NumericMatrix const&   Xwt,
			    Rcpp::NumericVector const& wtres) {
	double mone = -1., one = 1.; 
	int Wnc = Xwt.ncol();
	if (d_Ut) M_cholmod_free_sparse(&d_Ut, &c);
	if (Wnc == 1) {
	    d_Ut = M_cholmod_copy_sparse(&d_Zt, &c);
	    chmDn csqrtX(Xwt);
	    M_cholmod_scale(&csqrtX, CHOLMOD_COL, d_Ut, &c);
	} else {
	    int n = Xwt.nrow();
	    CHM_TR tr = M_cholmod_sparse_to_triplet(&d_Zt, &c);
	    int *j = (int*)tr->j, nnz = tr->nnz;
	    double *x = (double*)tr->x, *W = Xwt.begin();
	    for (int k = 0; k < nnz; k++) {
		x[k] *= W[j[k]];
		j[k] = j[k] % n;
	    }
	    tr->ncol = (size_t)n;

	    d_Ut = M_cholmod_triplet_to_sparse(tr, nnz, &c);
	    M_cholmod_free_triplet(&tr, &c);
	}
				// update the factor L
	CHM_SP LambdatUt = d_Lambda.crossprod(d_Ut);
	d_L.update(*LambdatUt, 1.);
	d_ldL2 = d_L.logDet2();
				// update cu
	chmDn ccu(d_cu), cwtres(wtres);
	copy(d_u0.begin(), d_u0.end(), d_cu.begin());
	M_cholmod_sdmult(LambdatUt, 0/*trans*/, &one, &mone, &cwtres, &ccu, &c);
	M_cholmod_free_sparse(&LambdatUt, &c);
	NumericMatrix
	    ans = d_L.solve(CHOLMOD_L, d_L.solve(CHOLMOD_P, d_cu));
	copy(ans.begin(), ans.end(), d_cu.begin());
	d_CcNumer = inner_product(d_cu.begin(), d_cu.end(), d_cu.begin(), double());
    }
Esempio n. 12
0
///' Calculate the network properties, data matrix not provided
///' 
///' @details
///' \subsection{Input expectations:}{
///'   Note that this function expects all inputs to be sensible, as checked by
///'   the R function 'checkUserInput' and processed by 'networkProperties'. 
///'   
///'   These requirements are:
///'   \itemize{
///'   \item{'net' is a square matrix, and its rownames are identical to its 
///'         column names.}
///'   \item{'moduleAssigments' is a named character vector, where the names
///'         represent node labels found in the discovery dataset. Unlike 
///'         'PermutationProcedure', these may include nodes that are not 
///'         present in 'data' and 'net'.}
///'   \item{The module labels specified in 'modules' must occur in 
///'         'moduleAssignments'.}
///'   }
///' }
///' 
///' @param net adjacency matrix of network edge weights between all pairs of 
///'   nodes in the dataset in which to calculate the network properties.
///' @param moduleAssignments a named character vector containing the module 
///'   each node belongs to in the discovery dataset. 
///' @param modules a character vector of modules for which to calculate the 
///'   network properties for.
///' 
///' @return a list containing the summary profile, node contribution, module
///'   coherence, weighted degree, and average edge weight for each 'module'.
///'   
///' @keywords internal
// [[Rcpp::export]]
Rcpp::List NetPropsNoData (
    Rcpp::NumericMatrix net, 
    Rcpp::CharacterVector moduleAssignments,
    Rcpp::CharacterVector modules
) {
  // convert the colnames / rownames to C++ equivalents
  const std::vector<std::string> nodeNames (Rcpp::as<std::vector<std::string>>(colnames(net)));
  unsigned int nNodes = net.ncol();
  
  R_CheckUserInterrupt(); 
  
  /* Next, we need to create two mappings:
  *  - From node IDs to indices in the dataset of interest
  *  - From modules to node IDs
  *  - From modules to only node IDs present in the dataset of interest
  */
  const namemap nodeIdxMap = MakeIdxMap(nodeNames);
  const stringmap modNodeMap = MakeModMap(moduleAssignments);
  const stringmap modNodePresentMap = MakeModMap(moduleAssignments, nodeIdxMap);
  
  // What modules do we actually want to analyse?
  const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules));
  
  R_CheckUserInterrupt(); 
  
  // Calculate the network properties for each module
  std::string mod; // iterators
  unsigned int mNodesPresent, mNodes;
  arma::uvec nodeIdx, propIdx, nodeRank;
  namemap propIdxMap;
  std::vector<std::string> modNodeNames; 
  arma::vec WD; // results containers
  double avgWeight; 
  Rcpp::NumericVector degree; // for casting to R equivalents
  Rcpp::List results; // final storage container
  for (auto mi = mods.begin(); mi != mods.end(); ++mi) {
    // What nodes are in this module?
    // modNodeNames = names(moduleAssignments[moduleAssignments == mod])
    mod = *mi;
    modNodeNames = GetModNodeNames(mod, modNodeMap);
    
    // initialise results containers with NA values for nodes not present in
    // the dataset we're calculating the network properties in.
    degree = Rcpp::NumericVector(modNodeNames.size(), NA_REAL);
    avgWeight = NA_REAL;
    degree.names() = modNodeNames;
    
    // Create a mapping between node names and the result vectors
    propIdxMap = MakeIdxMap(modNodeNames);
    
    // Get just the indices of nodes that are present in the requested dataset
    nodeIdx = GetNodeIdx(mod, modNodePresentMap, nodeIdxMap);
    mNodesPresent = nodeIdx.n_elem;
    
    // And a mapping of those nodes to the initialised vectors
    propIdx = GetNodeIdx(mod, modNodePresentMap, propIdxMap);
    mNodes = propIdx.n_elem;

    // Calculate the properties if the module has nodes in the test dataset
    if (nodeIdx.n_elem > 0) {
      // sort the node indices for sequential memory access
      nodeRank = SortNodes(nodeIdx.memptr(), mNodesPresent);
      
      WD = WeightedDegree(net.begin(), nNodes, nodeIdx.memptr(), mNodesPresent);
      WD = WD(nodeRank); // reorder results
      
      avgWeight = AverageEdgeWeight(WD.memptr(), WD.n_elem);
      R_CheckUserInterrupt(); 
      
      // Fill the results vectors appropriately
      Fill(degree, WD.memptr(), mNodesPresent, propIdx.memptr(), mNodes);
    }

    results.push_back(
      Rcpp::List::create(
        Rcpp::Named("degree") = degree,
        Rcpp::Named("avgWeight") = avgWeight
      )
    );
  }
  results.names() = mods;
  
  return(results);
}
Esempio n. 13
0
    densePred::densePred(Rcpp::NumericMatrix x)
	throw (std::runtime_error)
	: predMod(x.ncol()), d_X(x),
	  a_X(x.begin(), x.nrow(), x.ncol(), false, true) {
    }
Esempio n. 14
0
// [[Rcpp::export]]
Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name,
    Rcpp::NumericMatrix input_data, std::vector<std::string> variable_names, uint mtry, uint num_trees, bool verbose,
    uint seed, uint num_threads, bool write_forest, uint importance_mode_r, uint min_node_size,
    std::vector<std::vector<double>>& split_select_weights, bool use_split_select_weights,
    std::vector<std::string>& always_split_variable_names, bool use_always_split_variable_names,
    std::string status_variable_name, bool prediction_mode, Rcpp::List loaded_forest, Rcpp::RawMatrix sparse_data,
    bool sample_with_replacement, bool probability, std::vector<std::string>& unordered_variable_names,
    bool use_unordered_variable_names, bool save_memory, uint splitrule_r, 
    std::vector<double>& case_weights, bool use_case_weights, bool predict_all, 
    bool keep_inbag, double sample_fraction, double alpha, double minprop, bool holdout, uint prediction_type_r) {

  Rcpp::List result;
  Forest* forest = 0;
  Data* data = 0;
  try {

    // Empty split select weights and always split variables if not used
    if (!use_split_select_weights) {
      split_select_weights.clear();
    }
    if (!use_always_split_variable_names) {
      always_split_variable_names.clear();
    }
    if (!use_unordered_variable_names) {
      unordered_variable_names.clear();
    }
    if (!use_case_weights) {
      case_weights.clear();
    }

    std::ostream* verbose_out;
    if (verbose) {
      verbose_out = &Rcpp::Rcout;
    } else {
      verbose_out = new std::stringstream;
    }

    size_t num_rows = input_data.nrow();
    size_t num_cols = input_data.ncol();

    // Initialize data with double memmode
    data = new DataDouble(input_data.begin(), variable_names, num_rows, num_cols);

    // If there is sparse data, add it
    if (sparse_data.nrow() > 1) {
      data->addSparseData(sparse_data.begin(), sparse_data.ncol());
    }

    switch (treetype) {
    case TREE_CLASSIFICATION:
      if (probability) {
        forest = new ForestProbability;
      } else {
        forest = new ForestClassification;
      }
      break;
    case TREE_REGRESSION:
      forest = new ForestRegression;
      break;
    case TREE_SURVIVAL:
      forest = new ForestSurvival;
      break;
    case TREE_PROBABILITY:
      forest = new ForestProbability;
      break;
    }

    ImportanceMode importance_mode = (ImportanceMode) importance_mode_r;
    SplitRule splitrule = (SplitRule) splitrule_r;
    PredictionType prediction_type = (PredictionType) prediction_type_r;

    // Init Ranger
    forest->initR(dependent_variable_name, data, mtry, num_trees, verbose_out, seed, num_threads,
        importance_mode, min_node_size, split_select_weights, always_split_variable_names, status_variable_name,
        prediction_mode, sample_with_replacement, unordered_variable_names, save_memory, splitrule, case_weights, 
        predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type);

    // Load forest object if in prediction mode
    if (prediction_mode) {
      size_t dependent_varID = loaded_forest["dependent.varID"];
      //size_t num_trees = loaded_forest["num.trees"];
      std::vector<std::vector<std::vector<size_t>> > child_nodeIDs = loaded_forest["child.nodeIDs"];
      std::vector<std::vector<size_t>> split_varIDs = loaded_forest["split.varIDs"];
      std::vector<std::vector<double>> split_values = loaded_forest["split.values"];
      std::vector<bool> is_ordered = loaded_forest["is.ordered"];

      if (treetype == TREE_CLASSIFICATION) {
        std::vector<double> class_values = loaded_forest["class.values"];
        ((ForestClassification*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs,
            split_values, class_values, is_ordered);
      } else if (treetype == TREE_REGRESSION) {
        ((ForestRegression*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values,
            is_ordered);
      } else if (treetype == TREE_SURVIVAL) {
        size_t status_varID = loaded_forest["status.varID"];
        std::vector<std::vector<std::vector<double>> > chf = loaded_forest["chf"];
        std::vector<double> unique_timepoints = loaded_forest["unique.death.times"];
        ((ForestSurvival*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values,
            status_varID, chf, unique_timepoints, is_ordered);
      } else if (treetype == TREE_PROBABILITY) {
        std::vector<double> class_values = loaded_forest["class.values"];
        std::vector<std::vector<std::vector<double>>>terminal_class_counts =
        loaded_forest["terminal.class.counts"];
        ((ForestProbability*) forest)->loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values,
            class_values, terminal_class_counts, is_ordered);
      }
    }

    // Run Ranger
    forest->run(false);

    if (use_split_select_weights && importance_mode != IMP_NONE) {
      *verbose_out
          << "Warning: Split select weights used. Variable importance measures are only comparable for variables with equal weights."
          << std::endl;
    }
    
    // Use first non-empty dimension of predictions
    const std::vector<std::vector<std::vector<double>>>& predictions = forest->getPredictions();
    if (predictions.size() == 1) {
      if (predictions[0].size() == 1) {
        result.push_back(forest->getPredictions()[0][0], "predictions");
      } else {
        result.push_back(forest->getPredictions()[0], "predictions");
      }
    } else {
      result.push_back(forest->getPredictions(), "predictions");
    }
    
    // Return output
    result.push_back(forest->getNumTrees(), "num.trees");
    result.push_back(forest->getNumIndependentVariables(), "num.independent.variables");
    if (treetype == TREE_SURVIVAL) {
      ForestSurvival* temp = (ForestSurvival*) forest;
      result.push_back(temp->getUniqueTimepoints(), "unique.death.times");
    }
    if (!verbose) {
      std::stringstream temp;
      temp << verbose_out->rdbuf();
      result.push_back(temp.str(), "log");
    }
    if (!prediction_mode) {
      result.push_back(forest->getMtry(), "mtry");
      result.push_back(forest->getMinNodeSize(), "min.node.size");
      if (importance_mode != IMP_NONE) {
        result.push_back(forest->getVariableImportance(), "variable.importance");
      }
      result.push_back(forest->getOverallPredictionError(), "prediction.error");
    }
    
    if (keep_inbag) {
      result.push_back(forest->getInbagCounts(), "inbag.counts");
    }

    // Save forest if needed
    if (write_forest) {
      Rcpp::List forest_object;
      forest_object.push_back(forest->getDependentVarId(), "dependent.varID");
      forest_object.push_back(forest->getNumTrees(), "num.trees");
      forest_object.push_back(forest->getChildNodeIDs(), "child.nodeIDs");
      forest_object.push_back(forest->getSplitVarIDs(), "split.varIDs");
      forest_object.push_back(forest->getSplitValues(), "split.values");
      forest_object.push_back(forest->getIsOrderedVariable(), "is.ordered");

      if (treetype == TREE_CLASSIFICATION) {
        ForestClassification* temp = (ForestClassification*) forest;
        forest_object.push_back(temp->getClassValues(), "class.values");
      } else if (treetype == TREE_PROBABILITY) {
        ForestProbability* temp = (ForestProbability*) forest;
        forest_object.push_back(temp->getClassValues(), "class.values");
        forest_object.push_back(temp->getTerminalClassCounts(), "terminal.class.counts");
      } else if (treetype == TREE_SURVIVAL) {
        ForestSurvival* temp = (ForestSurvival*) forest;
        forest_object.push_back(temp->getStatusVarId(), "status.varID");
        forest_object.push_back(temp->getChf(), "chf");
        forest_object.push_back(temp->getUniqueTimepoints(), "unique.death.times");
      }
      result.push_back(forest_object, "forest");
    }

    delete forest;
    delete data;
  } catch (std::exception& e) {
    if (strcmp(e.what(), "User interrupt.") != 0) {
      Rcpp::Rcerr << "Error: " << e.what() << " Ranger will EXIT now." << std::endl;
    }
    delete forest;
    delete data;
    return result;
  }

  return result;
}
Esempio n. 15
0
RcppExport SEXP DEoptim(SEXP lowerS, SEXP upperS, SEXP fnS, SEXP controlS, SEXP rhoS) {
    
    try {
	Rcpp::NumericVector  f_lower(lowerS), f_upper(upperS); 		// User-defined bounds
	Rcpp::List           control(controlS); 			// named list of params

	double VTR           = Rcpp::as<double>(control["VTR"]);	// value to reach
	int i_strategy       = Rcpp::as<int>(control["strategy"]);    	// chooses DE-strategy
	int i_itermax        = Rcpp::as<int>(control["itermax"]);	// Maximum number of generations
	long l_nfeval        = 0;					// nb of function evals (NOT passed in)
	int i_D              = Rcpp::as<int>(control["npar"]);		// Dimension of parameter vector
	int i_NP             = Rcpp::as<int>(control["NP"]);		// Number of population members
	int i_storepopfrom   = Rcpp::as<int>(control["storepopfrom"]) - 1;  // When to start storing populations 
	int i_storepopfreq   = Rcpp::as<int>(control["storepopfreq"]);  // How often to store populations 
	int i_specinitialpop = Rcpp::as<int>(control["specinitialpop"]);// User-defined inital population 
	Rcpp::NumericMatrix initialpopm = Rcpp::as<Rcpp::NumericMatrix>(control["initialpop"]);
	double f_weight      = Rcpp::as<double>(control["F"]);  	// stepsize 
	double f_cross       = Rcpp::as<double>(control["CR"]);  	// crossover probability 
	int i_bs_flag        = Rcpp::as<int>(control["bs"]);   		// Best of parent and child 
	int i_trace          = Rcpp::as<int>(control["trace"]);  	// Print progress? 
	int i_check_winner   = Rcpp::as<int>(control["checkWinner"]); 	// Re-evaluate best parameter vector? 
	int i_av_winner      = Rcpp::as<int>(control["avWinner"]);  	// Average 
	double i_pPct        = Rcpp::as<double>(control["p"]); 		// p to define the top 100p% best solutions 

	arma::colvec minbound(f_lower.begin(), f_lower.size(), false); 	// convert Rcpp vectors to arma vectors
	arma::colvec maxbound(f_upper.begin(), f_upper.size(), false);
	arma::mat initpopm(initialpopm.begin(), initialpopm.rows(), initialpopm.cols(), false);

	arma::mat ta_popP(i_D, i_NP*2);    				// Data structures for parameter vectors 
	arma::mat ta_oldP(i_D, i_NP);
	arma::mat ta_newP(i_D, i_NP);
	arma::colvec t_bestP(i_D); 

	arma::colvec ta_popC(i_NP*2);  				    	// Data structures for obj. fun. values 
	arma::colvec ta_oldC(i_NP);
	arma::colvec ta_newC(i_NP);
	double t_bestC; 

	arma::colvec t_bestitP(i_D);
	arma::colvec t_tmpP(i_D); 

	int i_nstorepop = ceil((i_itermax - i_storepopfrom) / i_storepopfreq);
	arma::mat d_pop(i_D, i_NP); 
	Rcpp::List d_storepop(i_nstorepop);
	arma::mat d_bestmemit(i_D, i_itermax);       
	arma::colvec d_bestvalit(i_itermax); 	 
	int i_iter = 0;

	// call actual Differential Evolution optimization given the parameters
	devol(VTR, f_weight, f_cross, i_bs_flag, minbound, maxbound, fnS, rhoS, i_trace, i_strategy, i_D, i_NP, 
	      i_itermax, initpopm, i_storepopfrom, i_storepopfreq, i_specinitialpop, i_check_winner, i_av_winner,
	      ta_popP, ta_oldP, ta_newP, t_bestP, ta_popC, ta_oldC, ta_newC, t_bestC, t_bestitP, t_tmpP,
	      d_pop, d_storepop, d_bestmemit, d_bestvalit, i_iter, i_pPct, l_nfeval);

	return Rcpp::List::create(Rcpp::Named("bestmem")   = t_bestP,	// and return a named list with results to R
				  Rcpp::Named("bestval")   = t_bestC,
				  Rcpp::Named("nfeval")    = l_nfeval,
				  Rcpp::Named("iter")      = i_iter,
				  Rcpp::Named("bestmemit") = trans(d_bestmemit),
				  Rcpp::Named("bestvalit") = d_bestvalit,
				  Rcpp::Named("pop")       = trans(d_pop),
				  Rcpp::Named("storepop")  = d_storepop); 

    } catch( std::exception& ex) { 
	forward_exception_to_r(ex); 
    } catch(...) { 
	::Rf_error( "c++ exception (unknown reason)"); 
    }
    return R_NilValue;
}
Esempio n. 16
0
	IndepTestGauss(uint sampleSize, Rcpp::NumericMatrix& cor) :
		_sampleSize(sampleSize),
		_correlation(cor.begin(), cor.nrow(), cor.ncol(), false) {}
Esempio n. 17
0
//' rcpp_get_polygons
//'
//' Extracts all polygons from an overpass API query
//'
//' @param st Text contents of an overpass API query
//' @return A \code{SpatialLinesDataFrame} contains all polygons and associated data
// [[Rcpp::export]]
Rcpp::S4 rcpp_get_polygons (const std::string& st)
{
#ifdef DUMP_INPUT
    {
        std::ofstream dump ("./get-polygons.xml");
        if (dump.is_open())
        {
            dump.write (st.c_str(), st.size());
        }
    }
#endif

    XmlPolys xml (st);

    const std::map <osmid_t, Node>& nodes = xml.nodes ();
    const std::map <osmid_t, OneWay>& ways = xml.ways ();
    const std::vector <Relation>& rels = xml.relations ();

    int count = 0;
    float xmin = FLOAT_MAX, xmax = -FLOAT_MAX,
          ymin = FLOAT_MAX, ymax = -FLOAT_MAX;
    std::vector <float> lons, lats;
    std::unordered_set <std::string> idset; // see TODO below
    std::vector <std::string> colnames, rownames, polynames;
    std::set <std::string> varnames;
    Rcpp::List dimnames (0), dummy_list (0);
    Rcpp::NumericMatrix nmat (Rcpp::Dimension (0, 0));

    idset.clear ();

    colnames.push_back ("lon");
    colnames.push_back ("lat");
    varnames.insert ("name");
    // other varnames added below

    /*
     * NOTE: Nodes are first loaded into the 2 vectors of (lon, lat), and these
     * are then copied into nmat. This intermediate can be avoided by loading
     * directly into nmat using direct indexing rather than iterators, however
     * this does *NOT* make the routine any faster, and so the current version
     * which more safely uses iterators is kept instead.
     */

    Rcpp::Environment sp_env = Rcpp::Environment::namespace_env ("sp");
    Rcpp::Function Polygon = sp_env ["Polygon"];
    Rcpp::Language polygons_call ("new", "Polygons");
    Rcpp::S4 polygons;

    /*
     * Polygons are extracted from the XmlPolys class in three setps:
     *  1. Get the names of all polygons that are part of multipolygon relations
     *  2. Get the names of any remaining ways that are polygonal (start == end)
     *  3. From the resultant list, extract the actual polygonal ways
     *
     * NOTE: OSM polygons are stored as ways, and thus all objects in the class
     * xmlPolys are rightly referred to as ways. Here within this Rcpp function,
     * these are referred to as Polygons, but the iteration is over the actual
     * polygonal ways.
     */

    // Step#1
    std::set <osmid_t> the_ways; // Set will only insert unique values
    for (auto it = rels.begin (); it != rels.end (); ++it)
        for (auto itw = (*it).ways.begin (); itw != (*it).ways.end (); ++itw)
        {
            assert (ways.find (itw->first) != ways.end ());
            the_ways.insert (itw->first);
        }

    // Step#2
    //const std::map <osmid_t, OneWay>& ways = xml.ways ();
    for (auto it = ways.begin (); it != ways.end (); ++it)
    {
        if (the_ways.find ((*it).first) == the_ways.end ())
            if ((*it).second.nodes.begin () == (*it).second.nodes.end ())
                the_ways.insert ((*it).first);
    }
    // Step#2b - Erase any ways that contain no data (should not happen).
    for (auto it = the_ways.begin (); it != the_ways.end (); )
    {
        auto itw = ways.find (*it);
        if (itw->second.nodes.size () == 0)
            it = the_ways.erase (it);
        else
            ++it;
    }
    Rcpp::List polyList (the_ways.size ());

    // Step#3 - Extract and store the_ways
    for (auto it = the_ways.begin (); it != the_ways.end (); ++it)
    {
        auto itw = ways.find (*it);
        // Collect all unique keys
        std::for_each (itw->second.key_val.begin (),
                itw->second.key_val.end (),
                [&](const std::pair <std::string, std::string>& p)
                {
                    varnames.insert (p.first);
                });

        /*
         * The following lines check for duplicate way IDs -- which do very
         * occasionally occur -- and ensures unique values as required by 'sp'
         * through appending decimal digits to <osmid_t> OSM IDs.
         */
        std::string id = std::to_string (itw->first);
        int tempi = 0;
        while (idset.find (id) != idset.end ())
            id = std::to_string (itw->first) + "." + std::to_string (tempi++);
        idset.insert (id);
        polynames.push_back (id);

        // Then iterate over nodes of that way and store all lat-lons
        size_t n = itw->second.nodes.size ();
        lons.clear ();
        lats.clear ();
        rownames.clear ();
        lons.reserve (n);
        lats.reserve (n);
        rownames.reserve (n);
        for (auto itn = itw->second.nodes.begin ();
                itn != itw->second.nodes.end (); ++itn)
        {
            // TODO: Propoer exception handler
            assert (nodes.find (*itn) != nodes.end ());
            lons.push_back (nodes.find (*itn)->second.lon);
            lats.push_back (nodes.find (*itn)->second.lat);
            rownames.push_back (std::to_string (*itn));
        }

        xmin = std::min (xmin, *std::min_element (lons.begin(), lons.end()));
        xmax = std::max (xmax, *std::max_element (lons.begin(), lons.end()));
        ymin = std::min (ymin, *std::min_element (lats.begin(), lats.end()));
        ymax = std::max (ymax, *std::max_element (lats.begin(), lats.end()));

        nmat = Rcpp::NumericMatrix (Rcpp::Dimension (lons.size (), 2));
        std::copy (lons.begin (), lons.end (), nmat.begin ());
        std::copy (lats.begin (), lats.end (), nmat.begin () + lons.size ());

        // This only works with push_back, not with direct re-allocation
        dimnames.push_back (rownames);
        dimnames.push_back (colnames);
        nmat.attr ("dimnames") = dimnames;
        dimnames.erase (0, dimnames.size());

        //Rcpp::S4 poly = Rcpp::Language ("Polygon", nmat).eval ();
        Rcpp::S4 poly = Polygon (nmat);
        dummy_list.push_back (poly);
        polygons = polygons_call.eval ();
        polygons.slot ("Polygons") = dummy_list;
        polygons.slot ("ID") = id;
        polyList [count++] = polygons;

        dummy_list.erase (0);
    } // end for it over the_ways
    polyList.attr ("names") = polynames;

    // Store all key-val pairs in one massive DF
    int nrow = the_ways.size (), ncol = varnames.size ();
    Rcpp::CharacterVector kv_vec (nrow * ncol, Rcpp::CharacterVector::get_na ());
    int namecoli = std::distance (varnames.begin (), varnames.find ("name"));
    for (auto it = the_ways.begin (); it != the_ways.end (); ++it)
    {
        int rowi = std::distance (the_ways.begin (), it);
        auto itw = ways.find (*it);
        kv_vec (namecoli * nrow + rowi) = itw->second.name;
        for (auto kv_iter = itw->second.key_val.begin ();
                kv_iter != itw->second.key_val.end (); ++kv_iter)
        {
            const std::string& key = (*kv_iter).first;
            auto ni = varnames.find (key); // key must exist in varnames!
            int coli = std::distance (varnames.begin (), ni);
            kv_vec (coli * nrow + rowi) = (*kv_iter).second;
        }
    }

    Rcpp::Language sp_polys_call ("new", "SpatialPolygonsDataFrame");
    Rcpp::S4 sp_polys = sp_polys_call.eval ();
    sp_polys.slot ("polygons") = polyList;

    sp_polys.slot ("bbox") = rcpp_get_bbox (xmin, xmax, ymin, ymax);

    Rcpp::Language crs_call ("new", "CRS");
    Rcpp::S4 crs = crs_call.eval ();
    crs.slot ("projargs") = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0";
    sp_polys.slot ("proj4string") = crs;

    Rcpp::CharacterMatrix kv_mat (nrow, ncol, kv_vec.begin());
    Rcpp::DataFrame kv_df = kv_mat;
    kv_df.attr ("names") = varnames;
    sp_polys.slot ("data") = kv_df;

    return sp_polys;
}
Esempio n. 18
0
///' Calculate the network properties 
///' 
///' @details
///' \subsection{Input expectations:}{
///'   Note that this function expects all inputs to be sensible, as checked by
///'   the R function 'checkUserInput' and processed by 'networkProperties'. 
///'   
///'   These requirements are:
///'   \itemize{
///'   \item{The ordering of node names across 'data' and 'net' is consistent.}
///'   \item{The columns of 'data' are the nodes.}
///'   \item{'net' is a square matrix, and its rownames are identical to its 
///'         column names.}
///'   \item{'moduleAssigments' is a named character vector, where the names
///'         represent node labels found in the discovery dataset. Unlike 
///'         'PermutationProcedure', these may include nodes that are not 
///'         present in 'data' and 'net'.}
///'   \item{The module labels specified in 'modules' must occur in 
///'         'moduleAssignments'.}
///'   }
///' }
///' 
///' @param data data matrix from the dataset in which to calculate the network
///'   properties.
///' @param net adjacency matrix of network edge weights between all pairs of 
///'   nodes in the dataset in which to calculate the network properties.
///' @param moduleAssignments a named character vector containing the module 
///'   each node belongs to in the discovery dataset. 
///' @param modules a character vector of modules for which to calculate the 
///'   network properties for.
///' 
///' @return a list containing the summary profile, node contribution, module
///'   coherence, weighted degree, and average edge weight for each 'module'.
///'   
///' @keywords internal
// [[Rcpp::export]]
Rcpp::List NetProps (
    Rcpp::NumericMatrix data, Rcpp::NumericMatrix net, 
    Rcpp::CharacterVector moduleAssignments,
    Rcpp::CharacterVector modules
) {
  // First, scale the matrix data
  unsigned int nSamples = data.nrow();
  unsigned int nNodes = data.ncol();
  arma::mat scaledData = Scale(data.begin(), nSamples, nNodes);
  
  R_CheckUserInterrupt(); 
  
  // convert the colnames / rownames to C++ equivalents
  const std::vector<std::string> nodeNames (Rcpp::as<std::vector<std::string>>(colnames(net)));
  const std::vector<std::string> sampleNames (Rcpp::as<std::vector<std::string>>(rownames(data)));
  
  /* Next, we need to create two mappings:
  *  - From node IDs to indices in the dataset of interest
  *  - From modules to node IDs
  *  - From modules to only node IDs present in the dataset of interest
  */
  const namemap nodeIdxMap = MakeIdxMap(nodeNames);
  const stringmap modNodeMap = MakeModMap(moduleAssignments);
  const stringmap modNodePresentMap = MakeModMap(moduleAssignments, nodeIdxMap);
  
  // What modules do we actually want to analyse?
  const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules));
  
  R_CheckUserInterrupt(); 
  
  // Calculate the network properties for each module
  std::string mod; // iterators
  unsigned int mNodesPresent, mNodes;
  arma::uvec nodeIdx, propIdx, nodeRank;
  namemap propIdxMap;
  std::vector<std::string> modNodeNames; 
  arma::vec WD, SP, NC; // results containers
  double avgWeight, coherence; 
  Rcpp::NumericVector degree, summary, contribution; // for casting to R equivalents
  Rcpp::List results; // final storage container
  for (auto mi = mods.begin(); mi != mods.end(); ++mi) {
    // What nodes are in this module?
    mod = *mi;
    modNodeNames = GetModNodeNames(mod, modNodeMap);
    
    // initialise results containers with NA values for nodes not present in
    // the dataset we're calculating the network properties in.
    degree = Rcpp::NumericVector(modNodeNames.size(), NA_REAL);
    contribution = Rcpp::NumericVector(modNodeNames.size(), NA_REAL);
    summary = Rcpp::NumericVector(nSamples, NA_REAL);
    avgWeight = NA_REAL;
    coherence = NA_REAL;
    degree.names() = modNodeNames;
    contribution.names() = modNodeNames;
    
    // Create a mapping between node names and the result vectors
    propIdxMap = MakeIdxMap(modNodeNames);
    
    // Get just the indices of nodes that are present in the requested dataset
    nodeIdx = GetNodeIdx(mod, modNodePresentMap, nodeIdxMap);
    mNodesPresent = nodeIdx.n_elem;
    
    // And a mapping of those nodes to the initialised vectors
    propIdx = GetNodeIdx(mod, modNodePresentMap, propIdxMap);
    mNodes = propIdx.n_elem;
    
    // Calculate the properties if the module has nodes in the test dataset
    if (nodeIdx.n_elem > 0) {
      // sort the node indices for sequential memory access
      nodeRank = SortNodes(nodeIdx.memptr(), mNodesPresent);
      
      WD = WeightedDegree(net.begin(), nNodes, nodeIdx.memptr(), mNodesPresent);
      WD = WD(nodeRank); // reorder results
      
      avgWeight = AverageEdgeWeight(WD.memptr(), WD.n_elem);
      R_CheckUserInterrupt(); 
      
      SP = SummaryProfile(scaledData.memptr(), nSamples, nNodes, 
                          nodeIdx.memptr(), mNodesPresent);
      R_CheckUserInterrupt(); 
      
      NC = NodeContribution(scaledData.memptr(), nSamples, nNodes, 
                            nodeIdx.memptr(), mNodesPresent, SP.memptr());
      NC = NC(nodeRank); // reorder results
      
      coherence = ModuleCoherence(NC.memptr(), mNodesPresent);
      R_CheckUserInterrupt();
      
      // Convert NaNs to NAs
      SP.elem(arma::find_nonfinite(SP)).fill(NA_REAL);
      NC.elem(arma::find_nonfinite(NC)).fill(NA_REAL);
      if (!arma::is_finite(coherence)) {
        coherence = NA_REAL;
      }

      // Fill results vectors
      Fill(degree, WD.memptr(), mNodesPresent, propIdx.memptr(), mNodes);
      Fill(contribution, NC.memptr(), mNodesPresent, propIdx.memptr(), mNodes);
      summary = Rcpp::NumericVector(SP.begin(), SP.end());
    }
    summary.names() = sampleNames;
    
    results.push_back(
      Rcpp::List::create(
        Rcpp::Named("summary") = summary, 
        Rcpp::Named("contribution") = contribution, 
        Rcpp::Named("coherence") = coherence, 
        Rcpp::Named("degree") = degree,
        Rcpp::Named("avgWeight") = avgWeight
      )
    );
  }
  results.names() = mods;

  return(results);
}
Esempio n. 19
0
// [[Rcpp::export]]
Rcpp::List DEoptim_impl(const arma::colvec & minbound,                  // user-defined lower bounds
                        const arma::colvec & maxbound,                  // user-defined upper bounds
                        SEXP fnS,                                       // function to be optimized, either R or C++
                        const Rcpp::List & control,                     // parameters 
                        SEXP rhoS) {                                    // optional environment
    
    double VTR           = Rcpp::as<double>(control["VTR"]);            // value to reach
    int i_strategy       = Rcpp::as<int>(control["strategy"]);          // chooses DE-strategy
    int i_itermax        = Rcpp::as<int>(control["itermax"]);           // Maximum number of generations
    long l_nfeval        = 0;                                           // nb of function evals (NOT passed in)
    int i_D              = Rcpp::as<int>(control["npar"]);              // Dimension of parameter vector
    int i_NP             = Rcpp::as<int>(control["NP"]);                // Number of population members
    int i_storepopfrom   = Rcpp::as<int>(control["storepopfrom"]) - 1;  // When to start storing populations 
    int i_storepopfreq   = Rcpp::as<int>(control["storepopfreq"]);      // How often to store populations 
    int i_specinitialpop = Rcpp::as<int>(control["specinitialpop"]);    // User-defined inital population 
    double f_weight      = Rcpp::as<double>(control["F"]);              // stepsize 
    double f_cross       = Rcpp::as<double>(control["CR"]);             // crossover probability 
    int i_bs_flag        = Rcpp::as<int>(control["bs"]);                // Best of parent and child 
    int i_trace          = Rcpp::as<int>(control["trace"]);             // Print progress? 
    double i_pPct        = Rcpp::as<double>(control["p"]);              // p to define the top 100p% best solutions 
    double d_c           = Rcpp::as<double>(control["c"]);              // c as a trigger of the JADE algorithm
    double d_reltol      = Rcpp::as<double>(control["reltol"]);         // tolerance for relative convergence test, default to be sqrt(.Machine$double.eps)
    int i_steptol        = Rcpp::as<double>(control["steptol"]);        // maximum of iteration after relative convergence test is passed, default to be itermax

    // as above, doing it in two steps is faster
    Rcpp::NumericMatrix initialpopm = Rcpp::as<Rcpp::NumericMatrix>(control["initialpop"]);
    arma::mat initpopm(initialpopm.begin(), initialpopm.rows(), initialpopm.cols(), false);

    arma::mat ta_popP(i_D, i_NP*2);                                     // Data structures for parameter vectors 
    arma::mat ta_oldP(i_D, i_NP);
    arma::mat ta_newP(i_D, i_NP);
    arma::colvec t_bestP(i_D); 

    arma::colvec ta_popC(i_NP*2);                                       // Data structures for obj. fun. values 
    arma::colvec ta_oldC(i_NP);
    arma::colvec ta_newC(i_NP);
    double t_bestC; 

    arma::colvec t_bestitP(i_D);
    arma::colvec t_tmpP(i_D); 

    int i_nstorepop = static_cast<int>(ceil(static_cast<double>((i_itermax - i_storepopfrom) / i_storepopfreq)));
    arma::mat d_pop(i_D, i_NP); 
    Rcpp::List d_storepop(i_nstorepop);
    arma::mat d_bestmemit(i_D, i_itermax);       
    arma::colvec d_bestvalit(i_itermax);     
    int i_iter = 0;

    // call actual Differential Evolution optimization given the parameters
    devol(VTR, f_weight, f_cross, i_bs_flag, minbound, maxbound, fnS, rhoS, i_trace, i_strategy, i_D, i_NP, 
          i_itermax, initpopm, i_storepopfrom, i_storepopfreq, i_specinitialpop,
          ta_popP, ta_oldP, ta_newP, t_bestP, ta_popC, ta_oldC, ta_newC, t_bestC, t_bestitP, t_tmpP,
          d_pop, d_storepop, d_bestmemit, d_bestvalit, i_iter, i_pPct, d_c, l_nfeval,
          d_reltol, i_steptol);

    return Rcpp::List::create(Rcpp::Named("bestmem")   = t_bestP,   // and return a named list with results to R
                              Rcpp::Named("bestval")   = t_bestC,
                              Rcpp::Named("nfeval")    = l_nfeval,
                              Rcpp::Named("iter")      = i_iter,
                              Rcpp::Named("bestmemit") = trans(d_bestmemit),
                              Rcpp::Named("bestvalit") = d_bestvalit,
                              Rcpp::Named("pop")       = trans(d_pop),
                              Rcpp::Named("storepop")  = d_storepop); 

}
Esempio n. 20
0
// [[Rcpp::export]]
SEXP EMcppARMSIQuZhuo (Rcpp::NumericVector y_rcpp,
                  Rcpp::NumericMatrix y_lagged_rcpp,
                  Rcpp::NumericMatrix z_dependent_rcpp,
                  Rcpp::NumericMatrix z_independent_rcpp,
                  Rcpp::NumericMatrix beta0_rcpp,
                  Rcpp::NumericVector mu0_rcpp,
                  Rcpp::NumericVector sigma0_rcpp,
                  Rcpp::NumericMatrix gamma_dependent0_rcpp,
                  Rcpp::NumericMatrix gamma_independent0_rcpp,
                  Rcpp::NumericMatrix transition_probs0_rcpp,
                  Rcpp::NumericVector initial_dist0_rcpp,
                  int maxit,
                  double epsilon,
                  double transition_probs_min,
                  double transition_probs_max,
                  double sigma_min)
{
  // conversion first
  arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false);
  arma::mat    y_lagged(y_lagged_rcpp.begin(),
                y_lagged_rcpp.nrow(), y_lagged_rcpp.ncol(), false);
  arma::mat    z_dependent(z_dependent_rcpp.begin(),
                z_dependent_rcpp.nrow(), z_dependent_rcpp.ncol(), false);
  arma::mat    z_independent(z_independent_rcpp.begin(),
                z_independent_rcpp.nrow(), z_independent_rcpp.ncol(), false);
  arma::mat		 beta0(beta0_rcpp.begin(),
                beta0_rcpp.nrow(), beta0_rcpp.ncol(), false);
  arma::colvec mu0(mu0_rcpp.begin(), mu0_rcpp.size(), false);
  arma::colvec sigma0(sigma0_rcpp.begin(), sigma0_rcpp.size(), false);
  arma::mat    gamma_dependent0(gamma_dependent0_rcpp.begin(),
                 gamma_dependent0_rcpp.nrow(),
                 gamma_dependent0_rcpp.ncol(), false);
  arma::mat    gamma_independent0(gamma_independent0_rcpp.begin(),
                 gamma_independent0_rcpp.nrow(),
                 gamma_independent0_rcpp.ncol(), false);
  arma::mat    transition_probs0(transition_probs0_rcpp.begin(),
                transition_probs0_rcpp.nrow(),
                transition_probs0_rcpp.ncol(), false);
  arma::colvec initial_dist0(initial_dist0_rcpp.begin(),
                initial_dist0_rcpp.size(), false);
  arma::mat y_lagged_t = y_lagged.t();
  arma::mat z_dependent_t = z_dependent.t();
  arma::mat z_independent_t = z_independent.t();

  arma::colvec likelihoods(maxit, arma::fill::zeros);
  int index_exit = -1;

  Theta theta_original(beta0, mu0, sigma0,
             gamma_dependent0, gamma_independent0,
             transition_probs0, initial_dist0);
  Theta theta_updated = theta_original;
  Theta theta = theta_updated;
  theta.likelihood = -std::numeric_limits<double>::infinity();
  likelihoods(0) = -std::numeric_limits<double>::infinity();

  // 1. EM iterations
  for (int i = 1; i < maxit; i++)
  {
    index_exit++;
    Xi 		e_step = ExpectationStepARMSIQuZhuo(&y, &y_lagged, &z_dependent, &z_independent,
                          &theta_updated);

    likelihoods(i) = theta_updated.likelihood;

    // Stop if 1. the difference in likelihoods is small enough
    // or 2. likelihood decreases (a decrease is due to locating local max.
    // out of hard constraints in maximization step, which suggests that this
    // is not a good candidate anyway.)
    // Use negative condition to stop if 3. the updated likelihood is NaN.
    if (!((theta_updated.likelihood - theta.likelihood) >= epsilon))
      break;

    theta = theta_updated;
    theta_updated = MaximizationStepARMSIQuZhuo(&y, &y_lagged,
                                  &z_dependent, &z_independent,
                                  &y_lagged_t, &z_dependent_t, &z_independent_t,
                                  &theta,
                                  &(e_step.xi_k), &(e_step.xi_past_t),
                                  &(e_step.xi_n),
                                  transition_probs_min,
                                  transition_probs_max,
                                  sigma_min);
    // printf("sigma updated at %d: %f", i, theta_updated.sigma(0));
  }


  likelihoods = likelihoods.subvec(0, std::min((index_exit + 1), (maxit - 1))); // remove unused elements

  Rcpp::List theta_R = Rcpp::List::create(Named("beta") = wrap(theta.beta),
                            Named("mu") = wrap(theta.mu),
                            Named("sigma") = wrap(theta.sigma),
                            Named("gamma.dependent") =
                              wrap(theta.gamma_dependent),
                            Named("gamma.independent") =
                              wrap(theta.gamma_independent),
                            Named("transition.probs") =
                              wrap(theta.transition_probs),
                            Named("initial.dist") =
                              wrap(theta.initial_dist)
                            );

  return Rcpp::List::create(Named("theta") = wrap(theta_R),
                            Named("likelihood") = wrap(theta.likelihood),
                            Named("likelihoods") = wrap(likelihoods));
}
Esempio n. 21
0
///' Calculate the intermediate network properties in the discovery dataset
///' 
///' These properties are need at every permutation: so they will be computed 
///' once.
///' 
///' @details
///' \subsection{Input expectations:}{
///'   Note that this function expects all inputs to be sensible, as checked by
///'   the R function 'checkUserInput' and processed by 'modulePreservation'. 
///'   
///'   These requirements are:
///'   \itemize{
///'   \item{The ordering of node names across 'dData', 'dCorr', and 'dNet' is
///'         consistent.}
///'   \item{The columns of 'dData' are the nodes.}
///'   \item{'dData' has been scaled by 'Scale'.}
///'   \item{'dCorr' and 'dNet'  are square matrices, and their rownames are 
///'         identical to their column names.}
///'   \item{'moduleAssigments' is a named character vector, where the names
///'         represent node labels found in the discovery dataset (e.g. 'dNet').}
///'   }
///' }
///' 
///' @param dData scaled data matrix from the \emph{discovery} dataset.
///' @param dCorr matrix of correlation coefficients between all pairs of 
///'   variables/nodes in the \emph{discovery} dataset.
///' @param dNet adjacency matrix of network edge weights between all pairs of 
///'   nodes in the \emph{discovery} dataset.
///' @param tNodeNames a character vector of node names in the test dataset
///' @param moduleAssignments a named character vector containing the module 
///'   each node belongs to in the discovery dataset. 
///' @param modules a character vector of modules for which to calculate the 
///'   module preservation statistics.
///' 
///' @return a list containing three lists: a list of weighted degree vectors,
///'   a list of correlation coefficient vectors, and a list of node 
///'   contribution vectors. There is one vector for each module in each list.
///' 
///' @keywords internal
// [[Rcpp::export]]
Rcpp::List IntermediateProperties (
    Rcpp::NumericMatrix dData, Rcpp::NumericMatrix dCorr, Rcpp::NumericMatrix dNet,
    Rcpp::CharacterVector tNodeNames, Rcpp::CharacterVector moduleAssignments, 
    Rcpp::CharacterVector modules
) {
  // First, scale the matrix data
  unsigned int nSamples = dData.nrow();
  unsigned int nNodes = dData.ncol();

  R_CheckUserInterrupt(); 
  
  // convert the colnames / rownames to C++ equivalents
  const std::vector<std::string> dNames (Rcpp::as<std::vector<std::string>>(colnames(dNet)));
  const std::vector<std::string> tNames (Rcpp::as<std::vector<std::string>>(tNodeNames));
  
  /* Next, we need to create three mappings:
  *  - From node IDs to indices in the discovery dataset.
  *  - From modules to all node IDs.
  *  - From modules to just node IDs present in the test dataset.
  */
  const namemap dIdxMap = MakeIdxMap(dNames);
  const stringmap modNodeMap = MakeModMap(moduleAssignments);
  const namemap tIdxMap = MakeIdxMap(tNames);
  const stringmap modNodePresentMap = MakeModMap(moduleAssignments, tIdxMap);
  
  // What modules do we actually want to analyse?
  const std::vector<std::string> mods (Rcpp::as<std::vector<std::string>>(modules));
  
  // We only need to iterate through modules which have nodes in the test 
  // dataset
  std::vector<std::string> modsPresent;
  for (auto it = mods.begin(); it != mods.end(); ++it) {
    if (modNodePresentMap.count(*it) > 0) {
      modsPresent.push_back(*it);
    }
  }
  
  R_CheckUserInterrupt(); 
  
  Rcpp::List degree;
  Rcpp::List corr;
  Rcpp::List contribution;
  
  // Calculate the network properties in the discovery dataset.
  std::string mod;
  unsigned int mNodes;
  arma::uvec dIdx, dRank;
  arma::vec dSP, dWD, dCV, dNC; 
  for (auto mi = modsPresent.begin(); mi != modsPresent.end(); ++mi) {
    // Get the node indices in the discovery dataset for this module
    mod = *mi;
    dIdx = GetNodeIdx(mod, modNodePresentMap, dIdxMap);
    mNodes = dIdx.n_elem;
    R_CheckUserInterrupt(); 
    
    // Calculate the network properties and insert into their storage containers
    dCV = CorrVector(dCorr.begin(), nNodes, dIdx.memptr(), mNodes);
    R_CheckUserInterrupt(); 
    
    // Sort node indices for sequential memory access
    dRank = SortNodes(dIdx.memptr(), mNodes); 
    
    dWD = WeightedDegree(dNet.begin(), nNodes, dIdx.memptr(), mNodes);
    dWD = dWD(dRank); // reorder
    R_CheckUserInterrupt(); 
    
    dSP = SummaryProfile(dData.begin(), nSamples, nNodes, dIdx.memptr(), mNodes);
    R_CheckUserInterrupt(); 
    
    dNC = NodeContribution(dData.begin(), nSamples, nNodes, 
                           dIdx.memptr(), mNodes, dSP.memptr());
    dNC = dNC(dRank); // reorder results
    R_CheckUserInterrupt(); 
    
    // Cast to R-vectors and add to results lists
    corr.push_back(Rcpp::NumericVector(dCV.begin(), dCV.end()));
    degree.push_back(Rcpp::NumericVector(dWD.begin(), dWD.end()));
    contribution.push_back(Rcpp::NumericVector(dNC.begin(), dNC.end()));
  }
  degree.names() = modsPresent;
  corr.names() = modsPresent;
  contribution.names() = modsPresent;
  
  return Rcpp::List::create(
    Rcpp::Named("degree") = degree,
    Rcpp::Named("corr") = corr,
    Rcpp::Named("contribution") = contribution
  );
}
Esempio n. 22
0
// Returns an n by 1 column that represents the likelihoods of
// an estimated univariate MS-AR model for each k where 1 \leq k \leq n where
// beta is switching.
// Note that even if beta is non-switching, setting beta as a s by M matrix with
// repeated column of the original beta will give you the likelihood for
// MS-AR model with non-switching beta.
// TODO: Implement the version where z_dependent/z_independent exist(lagged variables should be implemented for MSM models)
// [[Rcpp::export]]
SEXP LikelihoodsMSMAR (Rcpp::NumericVector y_rcpp,
					Rcpp::NumericMatrix y_lagged_rcpp,
					Rcpp::NumericMatrix z_dependent_rcpp,
					Rcpp::NumericMatrix z_independent_rcpp,
					Rcpp::NumericMatrix z_dependent_lagged_rcpp,
					Rcpp::NumericMatrix z_independent_lagged_rcpp,
					Rcpp::NumericMatrix transition_probs_rcpp,
					Rcpp::NumericVector initial_dist_extended_rcpp,
					Rcpp::NumericMatrix beta_rcpp,
					Rcpp::NumericVector mu_rcpp,
					Rcpp::NumericVector sigma_rcpp,
					Rcpp::NumericMatrix gamma_dependent_rcpp,
					Rcpp::NumericVector gamma_independent_rcpp,
					Rcpp::IntegerMatrix state_conversion_mat_rcpp
					)
{
	int n = y_rcpp.size();

	arma::colvec y(y_rcpp.begin(), y_rcpp.size(), false);
	arma::mat    y_lagged(y_lagged_rcpp.begin(),
								y_lagged_rcpp.nrow(),
								y_lagged_rcpp.ncol(), false);
	arma::mat    z_dependent(z_dependent_rcpp.begin(),
								z_dependent_rcpp.nrow(),
								z_dependent_rcpp.ncol(), false);
	arma::mat    z_dependent_lagged(z_dependent_lagged_rcpp.begin(),
								z_dependent_lagged_rcpp.nrow(),
								z_dependent_lagged_rcpp.ncol(), false);
	arma::mat    z_independent(z_independent_rcpp.begin(),
								z_independent_rcpp.nrow(),
								z_independent_rcpp.ncol(), false);
	arma::mat    z_independent_lagged(z_independent_lagged_rcpp.begin(),
								z_independent_lagged_rcpp.nrow(),
								z_independent_lagged_rcpp.ncol(), false);
	arma::mat    transition_probs(transition_probs_rcpp.begin(),
								transition_probs_rcpp.nrow(),
								transition_probs_rcpp.ncol(), false);
	arma::colvec initial_dist_extended(initial_dist_extended_rcpp.begin(),
								initial_dist_extended_rcpp.size(), false);
	arma::mat    beta(beta_rcpp.begin(),
                   beta_rcpp.nrow(), beta_rcpp.ncol(), false);
	arma::colvec mu(mu_rcpp.begin(), mu_rcpp.size(), false);
	arma::colvec sigma(sigma_rcpp.begin(), sigma_rcpp.size(), false);
	arma::mat    gamma_dependent(gamma_dependent_rcpp.begin(),
								gamma_dependent_rcpp.nrow(),
								gamma_dependent_rcpp.ncol(), false);
	arma::colvec gamma_independent(gamma_independent_rcpp.begin(),
								gamma_independent_rcpp.size(), false);
	arma::imat    state_conversion_mat(state_conversion_mat_rcpp.begin(),
								state_conversion_mat_rcpp.nrow(),
								state_conversion_mat_rcpp.ncol(), false);

	arma::mat transition_probs_extended = GetExtendedTransitionProbs(
                              transition_probs, state_conversion_mat);
  arma::mat transition_probs_extended_t = transition_probs_extended.t();
  arma::colvec likelihoods(n, arma::fill::zeros);

	int M_extended = transition_probs_extended_t.n_rows;
	int M = gamma_dependent.n_cols;
	int s = beta.n_rows;
	int M_extended_block = IntPower(M, s);
	arma::mat* xi_k_t = new arma::mat(M_extended, n, arma::fill::zeros); // make a transpose first for col operations.

	// partition blocks
	int p = gamma_dependent.n_rows;
	int q = gamma_independent.size();
	arma::mat* z_dependent_lagged_blocks = new arma::mat[s];
	arma::mat* z_independent_lagged_blocks = new arma::mat[s];
	for (int i = 0; i < s; i++)
	{
		int z_dependent_block_first = i * p;
		int z_independent_block_first = i * q;
		z_dependent_lagged_blocks[i] = z_dependent_lagged.cols(z_dependent_block_first,
			z_dependent_block_first + p - 1);
		z_independent_lagged_blocks[i] = z_independent_lagged.cols(z_independent_block_first,
			z_independent_block_first + q - 1);
	}

	for (int k = 0; k < n; k++)
	{
		// initial setting; keep track of minimum value and its index
		// to divide everything by the min. value in order to prevent
		// possible numerical errors when computing posterior probs.
		int min_index = -1;
		double min_value = std::numeric_limits<double>::infinity();
		double* ratios = new double[M_extended];
		double row_sum = 0;

		arma::colvec xi_past;
		if (k > 0)
			xi_past = transition_probs_extended_t * exp(xi_k_t->col(k-1));
		else
			xi_past = initial_dist_extended;
    xi_past /= arma::sum(xi_past);

		for (int j_M = 0; j_M < M; j_M++)
		{
			for (int j_extra = 0; j_extra < M_extended_block; j_extra++)
			{
				int j = j_M * M_extended_block + j_extra;
				arma::colvec xi_k_t_jk = y.row(k);
				// arma::colvec xi_k_t_jk = y.row(k) -
		    //   z_dependent.row(k) * gamma_dependent.col(j_M) -
		    //   z_independent.row(k) * gamma_independent - mu(j_M);
				for (int lag = 0; lag < s; lag++)
				{
					int lagged_index = state_conversion_mat.at((lag + 1), j);
					xi_k_t_jk -= beta.at(lag, j_M) *
												(y_lagged.at(k, lag) -
												mu(lagged_index));
					xi_k_t_jk -= beta.at(lag, j_M) *
												(y_lagged.at(k, lag) -
												z_dependent_lagged_blocks[lag].row(k) * gamma_dependent.col(lagged_index) -
												z_independent_lagged_blocks[lag].row(k) * gamma_independent -
												mu(lagged_index));
				}
				xi_k_t->at(j,k) = xi_k_t_jk(0); // explicit gluing
		    xi_k_t->at(j,k) *= xi_k_t->at(j,k);
				xi_k_t->at(j,k) = xi_k_t->at(j,k) / (2 * (sigma(j_M) * sigma(j_M)));

				if (min_value > xi_k_t->at(j,k))
				{
					min_value = xi_k_t->at(j,k);
					min_index = j;
				}
				// SQRT2PI only matters in calculation of eta;
				// you can remove it in the final log-likelihood.
				ratios[j] = xi_past(j) / sigma(j_M);

			}
		}

		for (int j = 0; j < M_extended; j++)
		{
			if (j == min_index)
				row_sum += 1.0;
			else
				row_sum += (ratios[j] / ratios[min_index]) *
											exp(min_value - xi_k_t->at(j,k));
			xi_k_t->at(j,k) += log(ratios[j]);
		}

		likelihoods(k) = log(row_sum) - min_value + log(ratios[min_index]) - LOG2PI_OVERTWO;

		delete[] ratios; // clear memory
	}
	arma::exp(xi_k_t->cols(1,4)).t().print();
	// clear memory for blocks
	delete[] z_dependent_lagged_blocks;
	delete[] z_independent_lagged_blocks;
	delete xi_k_t;

	return (wrap(likelihoods));
}