/** \todo Get rid of runOptimizationParallelDeprecated(), and implement in UpdaterCovarAdapation */ void runOptimizationParallelDeprecated( Task* task, TaskSolver* task_solver, vector<DistributionGaussian*> initial_distributions, Updater* updater, int n_updates, int n_samples_per_update, string save_directory, bool overwrite, bool only_learning_curve) { // Some variables int n_parallel = initial_distributions.size(); assert(n_parallel>=2); int n_samples = n_samples_per_update; // Shorthand VectorXi offsets(n_parallel+1); offsets[0] = 0; for (int ii=0; ii<n_parallel; ii++) offsets[ii+1] = offsets[ii] + initial_distributions[ii]->mean().size(); int sum_n_dims = offsets[n_parallel]; // n_parallel X n_samples X n_dims // Note: n_samples must be the same for all, n_dims varies //vector<MatrixXd> sample(n_parallel); //for (int ii=0; ii<n_parallel; ii++) // // Pre-allocate memory just to be clear. // sample[ii] = MatrixXd(n_samples_per_update,initial_distributions[ii]->mean().size()); MatrixXd samples(n_samples,sum_n_dims); // Some variables VectorXd sample_eval(sum_n_dims); VectorXd cost_eval; MatrixXd cost_vars_eval; MatrixXd samples_per_parallel; MatrixXd cost_vars; VectorXd cur_costs; VectorXd costs(n_samples); VectorXd total_costs(n_samples); VectorXd weights; // Bookkeeping MatrixXd learning_curve(n_updates,3); vector<DistributionGaussian> distributions; vector<DistributionGaussian> distributions_new; for (int ii=0; ii<n_parallel; ii++) { distributions.push_back(*(initial_distributions[ii]->clone())); distributions_new.push_back(*(initial_distributions[ii]->clone())); } // Optimization loop for (int i_update=0; i_update<n_updates; i_update++) { // 0. Get cost of current distribution mean for (int pp=0; pp<n_parallel; pp++) sample_eval.segment(offsets[pp],offsets[pp+1]-offsets[pp]) = distributions[pp].mean().transpose(); task_solver->performRollout(sample_eval,cost_vars_eval); task->evaluateRollout(cost_vars_eval,sample_eval,cost_eval); Rollout* rollout_eval = new Rollout(sample_eval,cost_vars_eval,cost_eval); // 1. Sample from distribution for (int pp=0; pp<n_parallel; pp++) { distributions[pp].generateSamples(n_samples, samples_per_parallel); int width = offsets[pp+1]-offsets[pp]; samples.block(0,offsets[pp],n_samples,width) = samples_per_parallel; } vector<Rollout*> rollouts(n_samples_per_update); for (int i_sample=0; i_sample<n_samples_per_update; i_sample++) { // 2. Perform rollouts for the samples task_solver->performRollout(samples.row(i_sample), cost_vars); // 3. Evaluate the last batch of rollouts task->evaluateRollout(cost_vars,samples.row(i_sample),cur_costs); // Bookkeeping costs[i_sample] = cur_costs[0]; rollouts[i_sample] = new Rollout(samples.row(i_sample),cost_vars,cur_costs); } // 4. Update parameters for (int pp=0; pp<n_parallel; pp++) { int width = offsets[pp+1]-offsets[pp]; samples_per_parallel = samples.block(0,offsets[pp],n_samples,width); updater->updateDistribution(distributions[pp], samples_per_parallel, costs, weights, distributions_new[pp]); } // Some output and/or saving to file (if "directory" is set) if (save_directory.empty()) { cout << i_update+1 << " cost_eval=" << cost_eval << endl; } else { // Update learning curve // How many samples so far? learning_curve(i_update,0) = i_update*n_samples_per_update; // Cost of evaluation learning_curve(i_update,1) = cost_eval[0]; // Exploration magnitude learning_curve(i_update,2) = 0.0; for (int pp=0; pp<n_parallel; pp++) learning_curve(i_update,2) += sqrt(distributions[pp].maxEigenValue()); // Save more than just learning curve. if (!only_learning_curve) { saveToDirectory(save_directory,i_update,distributions,rollout_eval,rollouts,weights,distributions_new); if (i_update==0) task->savePlotRolloutScript(save_directory); } } // Distribution is new distribution for (int ii=0; ii<n_parallel; ii++) distributions[ii] = distributions_new[ii]; } }
// ------------------------------------------------------------------------- void MultiMDDAGLearner::parallelRollout(const nor_utils::Args& args, InputData* pData, const string fname, int rsize, GenericClassificationBasedPolicy* policy, PolicyResult* result, const int weakLearnerPostion) { vector<AlphaReal> policyError(_shypIter); vector<InputData*> rollouts(_shypIter,NULL); // generate rollout if (_randomNPercent>0) { vector<int> randomIndices(_shypIter); for( int si = 0; si < _shypIter; ++si ) randomIndices[si]=si; random_shuffle(randomIndices.begin(), randomIndices.end()); int ig = static_cast<int>(static_cast<float>(_shypIter * _randomNPercent) / 100.0); for( int si = 0; si < ig; ++si ) { stringstream ss(fname); // if (si>0) // { // ss << fname << "_" << si; // } else { // ss << fname; // } MDDAGLearner::parallelRollout(args, pData, ss.str(), rsize, policy, result, randomIndices[si]); InputData* rolloutTrainingData = getRolloutData( args, ss.str() ); if (_verbose) cout << "---> Rollout size("<< randomIndices[si] << ")" << rolloutTrainingData->getNumExamples() << endl; rollouts[randomIndices[si]] = rolloutTrainingData; } } else { for( int si = 0; si < _shypIter; ++si ) { stringstream ss(fname); // if (si>0) // { // ss << fname << "_" << si; // } else { // ss << fname; // } MDDAGLearner::parallelRollout(args, pData, ss.str(), rsize, policy, result, si); InputData* rolloutTrainingData = getRolloutData( args, ss.str() ); if (_verbose) cout << "---> Rollout size("<< si << ")" << rolloutTrainingData->getNumExamples() << endl; rollouts[si] = rolloutTrainingData; } } // update policy int numOfUpdatedPolicy = 0; for( int si = 0; si < _shypIter; ++si ) { if ((rollouts[si]==NULL) || (rollouts[si]->getNumExamples()<=2)) continue; policyError[si] = _policy->trainpolicy( rollouts[si], _baseLearnerName, _trainingIter, si ); if (_verbose) cout << "--> Policy error: pos: " << si << "\t error:\t" << setprecision (4) << policyError[si] << endl; numOfUpdatedPolicy++; } if (_verbose) cout << "--> Number of updated policy" << numOfUpdatedPolicy << endl << flush; //release rolouts for( int si = 0; si < _shypIter; ++si ) { if (rollouts[si]) delete rollouts[si]; } }
void runOptimizationTask( const Task* const task, const TaskSolver* const task_solver, const DistributionGaussian* const initial_distribution, const Updater* const updater, int n_updates, int n_samples_per_update, std::string save_directory, bool overwrite, bool only_learning_curve) { int n_cost_components = task->getNumberOfCostComponents(); // Some variables VectorXd sample_eval; MatrixXd cost_vars_eval; VectorXd cost_eval(1+n_cost_components); MatrixXd samples; MatrixXd cost_vars; VectorXd weights; MatrixXd costs(n_samples_per_update,1+n_cost_components); // tmp variables VectorXd total_costs(n_samples_per_update); VectorXd cur_cost(1+n_cost_components); // Bookkeeping MatrixXd learning_curve(n_updates,2+n_cost_components); MatrixXd exploration_curve(n_updates,2); if (save_directory.empty()) cout << "init = " << " distribution=" << *initial_distribution; DistributionGaussian distribution = *(initial_distribution->clone()); DistributionGaussian distribution_new = *(initial_distribution->clone()); // Optimization loop for (int i_update=0; i_update<n_updates; i_update++) { // 0. Get cost of current distribution mean sample_eval = distribution.mean().transpose(); task_solver->performRollout(sample_eval,cost_vars_eval); task->evaluateRollout(cost_vars_eval,sample_eval,cost_eval); Rollout* rollout_eval = new Rollout(sample_eval,cost_vars_eval,cost_eval); // 1. Sample from distribution distribution.generateSamples(n_samples_per_update, samples); vector<Rollout*> rollouts(n_samples_per_update); for (int i_sample=0; i_sample<n_samples_per_update; i_sample++) { // 2A. Perform the rollout task_solver->performRollout(samples.row(i_sample),cost_vars); // 2B. Evaluate the rollout task->evaluateRollout(cost_vars,samples.row(i_sample),cur_cost); costs.row(i_sample) = cur_cost; rollouts[i_sample] = new Rollout(samples.row(i_sample),cost_vars,cur_cost); } // 3. Update parameters (first column of costs contains sum of cost components) total_costs = costs.col(0); updater->updateDistribution(distribution, samples, total_costs, weights, distribution_new); // Bookkeeping // Some output and/or saving to file (if "directory" is set) if (save_directory.empty()) { cout << "\t cost_eval=" << cost_eval << endl << i_update+1 << " " << distribution; } else { // Update learning curve // How many samples? int i_samples = i_update*n_samples_per_update; learning_curve(i_update,0) = i_samples; // Cost of evaluation learning_curve.block(i_update,1,1,1+n_cost_components) = cost_eval.transpose(); // Exploration magnitude exploration_curve(i_update,0) = i_samples; exploration_curve(i_update,1) = sqrt(distribution.maxEigenValue()); // Save more than just learning curve. if (!only_learning_curve) { saveToDirectory(save_directory,i_update,distribution,rollout_eval,rollouts,weights,distribution_new); if (i_update==0) task->savePlotRolloutScript(save_directory); } } // Distribution is new distribution distribution = distribution_new; } // Save learning curve to file, if necessary if (!save_directory.empty()) { // Todo: save cost labels also saveMatrix(save_directory, "exploration_curve.txt",exploration_curve,overwrite); saveMatrix(save_directory, "learning_curve.txt",learning_curve,overwrite); } }