コード例 #1
0
ファイル: bellman.cpp プロジェクト: cran/rcss
// Bellman recursion using row rearrangement
//[[Rcpp::export]]
Rcpp::List Bellman(const arma::mat& grid,
                   Rcpp::NumericVector reward_,
                   const arma::cube& scrap,
                   Rcpp::NumericVector control_,
                   const arma::cube& disturb,
                   const arma::vec& weight) {
  // Passing R objects to C++
  const std::size_t n_grid = grid.n_rows;
  const std::size_t n_dim = grid.n_cols;
  const arma::ivec r_dims = reward_.attr("dim");
  const std::size_t n_pos = r_dims(3);
  const std::size_t n_action = r_dims(2);
  const std::size_t n_dec = r_dims(4) + 1;
  const arma::cube
      reward(reward_.begin(), n_grid, n_dim * n_action * n_pos, n_dec - 1, false);
  const arma::ivec c_dims = control_.attr("dim");
  arma::cube control2;
  arma::imat control;
  bool full_control;
  if (c_dims.n_elem == 3) {
    full_control = false;
    arma::cube temp_control2(control_.begin(), n_pos, n_action, n_pos, false);
    control2 = temp_control2;
  } else {
    full_control = true;
    arma::mat temp_control(control_.begin(), n_pos, n_action, false);
    control = arma::conv_to<arma::imat>::from(temp_control);
  }
  const std::size_t n_disturb = disturb.n_slices;
  // Bellman recursion
  arma::cube value(n_grid, n_dim * n_pos, n_dec);
  arma::cube cont(n_grid, n_dim * n_pos, n_dec - 1, arma::fill::zeros);
  arma::mat d_value(n_grid, n_dim);
  Rcpp::Rcout << "At dec: " << n_dec - 1 << "...";
  for (std::size_t pp = 0; pp < n_pos; pp++) {
    value.slice(n_dec - 1).cols(n_dim * pp, n_dim * (pp + 1) - 1) =
        scrap.slice(pp);
  }
  for (int tt = (n_dec - 2); tt >= 0; tt--) {
    Rcpp::Rcout << tt;
    // Approximating the continuation value
    for (std::size_t pp = 0; pp < n_pos; pp++) {
      cont.slice(tt).cols(n_dim * pp, n_dim * (pp + 1) - 1) =
          Expected(grid,
                   value.slice(tt + 1).cols(pp * n_dim, n_dim * (pp + 1) - 1),
                   disturb, weight);
    }
    Rcpp::Rcout << "..";
    // Optimise value function
    if (full_control) {
      BellmanOptimal(grid, control, value, reward, cont, tt);
    } else {
      BellmanOptimal2(grid, control2, value, reward, cont, tt);
    }
    Rcpp::Rcout << ".";
  }
  return Rcpp::List::create(Rcpp::Named("value") = value,
                            Rcpp::Named("expected") = cont);
}
コード例 #2
0
ファイル: add_dual_bounds.cpp プロジェクト: YeeJeremy/rcss
//[[Rcpp::export]]
Rcpp::List AddDualBounds(const arma::cube& path,
                         Rcpp::NumericVector control_,
                         Rcpp::Function Reward_,
                         Rcpp::Function Scrap_,
                         const arma::cube& mart,
                         const arma::ucube& path_action) {
  // Extract parameters
  const std::size_t n_dec = path.n_slices;
  const std::size_t n_path = path.n_rows;
  const std::size_t n_dim = path.n_cols;
  const arma::ivec c_dims = control_.attr("dim");
  const std::size_t n_pos = c_dims(0);
  const std::size_t n_action = c_dims(1);
  arma::imat control;  // full control
  arma::cube control2;  // partial control
  bool full_control;
  if (c_dims.n_elem == 3) {
    full_control = false;
    arma::cube temp_control2(control_.begin(), n_pos, n_action, n_pos, false);
    control2 = temp_control2;
  } else {
    full_control = true;
    arma::mat temp_control(control_.begin(), n_pos, n_action, false);
    control = arma::conv_to<arma::imat>::from(temp_control);
  }
  // Initialise with scrap value
  arma::cube primals(n_path, n_pos, n_dec);
  primals.slice(n_dec - 1) = Rcpp::as<arma::mat>(
      Scrap_(Rcpp::as<Rcpp::NumericMatrix>(Rcpp::wrap(path.slice(n_dec - 1)))));
  arma::cube duals = primals;
  // Perform the backward induction
  arma::uword policy;
  arma::cube reward(n_path, n_action, n_pos);
  if (full_control) {  // For the full control case
    arma::uword next;
    for (int tt = (n_dec - 2); tt >= 0; tt--) {
      reward = Rcpp::as<arma::cube>(Reward_(
          Rcpp::as<Rcpp::NumericMatrix>(Rcpp::wrap(path.slice(tt))), tt + 1));
#pragma omp parallel for private(policy, next)
      for (std::size_t ii = 0; ii < n_path; ii++) {
        for (std::size_t pp = 0; pp < n_pos; pp++) {
          // Primal values
          policy = path_action(ii, pp, tt) - 1;  // R to C indexing
          next = control(pp, policy) - 1;
          primals(ii, pp, tt) = reward(ii, policy, pp) + mart(ii, next, tt)
              + primals(ii, next, tt + 1);
          // Dual values
          next = control(pp, 0) - 1;
          duals(ii, pp, tt) = reward(ii, 0, pp) + mart(ii, next, tt)
              + duals(ii, next, tt + 1);
          for (std::size_t aa = 1; aa < n_action; aa++) {
            next = control(pp, aa) - 1;
            duals(ii, pp, tt) = std::max(reward(ii, aa, pp) + mart(ii, next, tt)
                         + duals(ii, next, tt + 1), duals(ii, pp, tt));
          }
        }
      }
    }
  } else {  // Positions evolve randomly
    arma::rowvec mod(n_pos);
    arma::rowvec prob_weight(n_pos);
    for (int tt = (n_dec - 2); tt >= 0; tt--) {
      reward = Rcpp::as<arma::cube>(Reward_(
          Rcpp::as<Rcpp::NumericMatrix>(Rcpp::wrap(path.slice(tt))), tt + 1));
#pragma omp parallel for private(policy, prob_weight, mod)
      for (std::size_t ii = 0; ii < n_path; ii++) {
        for (std::size_t pp = 0; pp < n_pos; pp++) {
          //  Primal values
          mod = primals.slice(tt + 1).row(ii) + mart.slice(tt).row(ii);
          policy = path_action(ii, pp, tt) - 1;
          prob_weight = control2.tube(pp, policy);
          primals(ii, pp, tt) =
              reward(ii, policy, pp) + arma::sum(prob_weight % mod);
          // Dual values
          mod = duals.slice(tt + 1).row(ii) + mart.slice(tt).row(ii);
          prob_weight = control2.tube(pp, 0);
          duals(ii, pp, tt) = reward(ii, 0, pp) + arma::sum(prob_weight % mod);
          for (std::size_t aa = 1; aa < n_action; aa++) {
            prob_weight = control2.tube(pp, aa);
            duals(ii, pp, tt) =
                std::max(reward(ii, aa, pp) + arma::sum(prob_weight % mod),
                         duals(ii, pp, tt));
          }
        }
      }
    }
  }
  return Rcpp::List::create(Rcpp::Named("primal") = primals,
                            Rcpp::Named("dual") = duals);
}