bool saveToDirectory(const vector<UpdateSummaryParallel>& update_summaries, std::string directory, bool overwrite, bool only_learning_curve) { // Save the learning curve int n_updates = update_summaries.size(); assert(n_updates>0); int n_parallel = update_summaries[0].distributions.size(); MatrixXd learning_curve(n_updates,2+n_parallel); learning_curve(0,0) = 0; // First evaluation is at 0 for (int i_update=0; i_update<n_updates; i_update++) { // Number of samples at which an evaluation was performed. if (i_update>0) { int n_samples = update_summaries[i_update].costs.rows(); learning_curve(i_update,0) = learning_curve(i_update-1,0) + n_samples; } // The cost of the evaluation at this update learning_curve(i_update,1) = update_summaries[i_update].cost_eval; for (int i_parallel=0; i_parallel<n_parallel; i_parallel++) { // The largest eigenvalue of the covariance matrix, for each distribution DistributionGaussian* distribution = update_summaries[i_update].distributions[i_parallel]; MatrixXd eigen_values = distribution->covar().eigenvalues().real(); learning_curve(i_update,2+i_parallel) = sqrt(eigen_values.maxCoeff()); } } if (!saveMatrix(directory, "learning_curve.txt", learning_curve, overwrite)) return false; if (!only_learning_curve) { // Save all the information in the update summaries for (int i_update=0; i_update<n_updates; i_update++) { stringstream stream; stream << directory << "/update" << setw(5) << setfill('0') << i_update+1 << "/"; if (!saveToDirectory(update_summaries[i_update], stream.str(),overwrite)) return false; } } return true; }
/** \todo Get rid of runOptimizationParallelDeprecated(), and implement in UpdaterCovarAdapation */ void runOptimizationParallelDeprecated( Task* task, TaskSolver* task_solver, vector<DistributionGaussian*> initial_distributions, Updater* updater, int n_updates, int n_samples_per_update, string save_directory, bool overwrite, bool only_learning_curve) { // Some variables int n_parallel = initial_distributions.size(); assert(n_parallel>=2); int n_samples = n_samples_per_update; // Shorthand VectorXi offsets(n_parallel+1); offsets[0] = 0; for (int ii=0; ii<n_parallel; ii++) offsets[ii+1] = offsets[ii] + initial_distributions[ii]->mean().size(); int sum_n_dims = offsets[n_parallel]; // n_parallel X n_samples X n_dims // Note: n_samples must be the same for all, n_dims varies //vector<MatrixXd> sample(n_parallel); //for (int ii=0; ii<n_parallel; ii++) // // Pre-allocate memory just to be clear. // sample[ii] = MatrixXd(n_samples_per_update,initial_distributions[ii]->mean().size()); MatrixXd samples(n_samples,sum_n_dims); // Some variables VectorXd sample_eval(sum_n_dims); VectorXd cost_eval; MatrixXd cost_vars_eval; MatrixXd samples_per_parallel; MatrixXd cost_vars; VectorXd cur_costs; VectorXd costs(n_samples); VectorXd total_costs(n_samples); VectorXd weights; // Bookkeeping MatrixXd learning_curve(n_updates,3); vector<DistributionGaussian> distributions; vector<DistributionGaussian> distributions_new; for (int ii=0; ii<n_parallel; ii++) { distributions.push_back(*(initial_distributions[ii]->clone())); distributions_new.push_back(*(initial_distributions[ii]->clone())); } // Optimization loop for (int i_update=0; i_update<n_updates; i_update++) { // 0. Get cost of current distribution mean for (int pp=0; pp<n_parallel; pp++) sample_eval.segment(offsets[pp],offsets[pp+1]-offsets[pp]) = distributions[pp].mean().transpose(); task_solver->performRollout(sample_eval,cost_vars_eval); task->evaluateRollout(cost_vars_eval,sample_eval,cost_eval); Rollout* rollout_eval = new Rollout(sample_eval,cost_vars_eval,cost_eval); // 1. Sample from distribution for (int pp=0; pp<n_parallel; pp++) { distributions[pp].generateSamples(n_samples, samples_per_parallel); int width = offsets[pp+1]-offsets[pp]; samples.block(0,offsets[pp],n_samples,width) = samples_per_parallel; } vector<Rollout*> rollouts(n_samples_per_update); for (int i_sample=0; i_sample<n_samples_per_update; i_sample++) { // 2. Perform rollouts for the samples task_solver->performRollout(samples.row(i_sample), cost_vars); // 3. Evaluate the last batch of rollouts task->evaluateRollout(cost_vars,samples.row(i_sample),cur_costs); // Bookkeeping costs[i_sample] = cur_costs[0]; rollouts[i_sample] = new Rollout(samples.row(i_sample),cost_vars,cur_costs); } // 4. Update parameters for (int pp=0; pp<n_parallel; pp++) { int width = offsets[pp+1]-offsets[pp]; samples_per_parallel = samples.block(0,offsets[pp],n_samples,width); updater->updateDistribution(distributions[pp], samples_per_parallel, costs, weights, distributions_new[pp]); } // Some output and/or saving to file (if "directory" is set) if (save_directory.empty()) { cout << i_update+1 << " cost_eval=" << cost_eval << endl; } else { // Update learning curve // How many samples so far? learning_curve(i_update,0) = i_update*n_samples_per_update; // Cost of evaluation learning_curve(i_update,1) = cost_eval[0]; // Exploration magnitude learning_curve(i_update,2) = 0.0; for (int pp=0; pp<n_parallel; pp++) learning_curve(i_update,2) += sqrt(distributions[pp].maxEigenValue()); // Save more than just learning curve. if (!only_learning_curve) { saveToDirectory(save_directory,i_update,distributions,rollout_eval,rollouts,weights,distributions_new); if (i_update==0) task->savePlotRolloutScript(save_directory); } } // Distribution is new distribution for (int ii=0; ii<n_parallel; ii++) distributions[ii] = distributions_new[ii]; } }
void runOptimizationTask( const Task* const task, const TaskSolver* const task_solver, const DistributionGaussian* const initial_distribution, const Updater* const updater, int n_updates, int n_samples_per_update, std::string save_directory, bool overwrite, bool only_learning_curve) { int n_cost_components = task->getNumberOfCostComponents(); // Some variables VectorXd sample_eval; MatrixXd cost_vars_eval; VectorXd cost_eval(1+n_cost_components); MatrixXd samples; MatrixXd cost_vars; VectorXd weights; MatrixXd costs(n_samples_per_update,1+n_cost_components); // tmp variables VectorXd total_costs(n_samples_per_update); VectorXd cur_cost(1+n_cost_components); // Bookkeeping MatrixXd learning_curve(n_updates,2+n_cost_components); MatrixXd exploration_curve(n_updates,2); if (save_directory.empty()) cout << "init = " << " distribution=" << *initial_distribution; DistributionGaussian distribution = *(initial_distribution->clone()); DistributionGaussian distribution_new = *(initial_distribution->clone()); // Optimization loop for (int i_update=0; i_update<n_updates; i_update++) { // 0. Get cost of current distribution mean sample_eval = distribution.mean().transpose(); task_solver->performRollout(sample_eval,cost_vars_eval); task->evaluateRollout(cost_vars_eval,sample_eval,cost_eval); Rollout* rollout_eval = new Rollout(sample_eval,cost_vars_eval,cost_eval); // 1. Sample from distribution distribution.generateSamples(n_samples_per_update, samples); vector<Rollout*> rollouts(n_samples_per_update); for (int i_sample=0; i_sample<n_samples_per_update; i_sample++) { // 2A. Perform the rollout task_solver->performRollout(samples.row(i_sample),cost_vars); // 2B. Evaluate the rollout task->evaluateRollout(cost_vars,samples.row(i_sample),cur_cost); costs.row(i_sample) = cur_cost; rollouts[i_sample] = new Rollout(samples.row(i_sample),cost_vars,cur_cost); } // 3. Update parameters (first column of costs contains sum of cost components) total_costs = costs.col(0); updater->updateDistribution(distribution, samples, total_costs, weights, distribution_new); // Bookkeeping // Some output and/or saving to file (if "directory" is set) if (save_directory.empty()) { cout << "\t cost_eval=" << cost_eval << endl << i_update+1 << " " << distribution; } else { // Update learning curve // How many samples? int i_samples = i_update*n_samples_per_update; learning_curve(i_update,0) = i_samples; // Cost of evaluation learning_curve.block(i_update,1,1,1+n_cost_components) = cost_eval.transpose(); // Exploration magnitude exploration_curve(i_update,0) = i_samples; exploration_curve(i_update,1) = sqrt(distribution.maxEigenValue()); // Save more than just learning curve. if (!only_learning_curve) { saveToDirectory(save_directory,i_update,distribution,rollout_eval,rollouts,weights,distribution_new); if (i_update==0) task->savePlotRolloutScript(save_directory); } } // Distribution is new distribution distribution = distribution_new; } // Save learning curve to file, if necessary if (!save_directory.empty()) { // Todo: save cost labels also saveMatrix(save_directory, "exploration_curve.txt",exploration_curve,overwrite); saveMatrix(save_directory, "learning_curve.txt",learning_curve,overwrite); } }