KUKADU_SHARED_PTR<Trajectory> PoWER::updateStep() { KUKADU_SHARED_PTR<Trajectory> lastUp = getLastUpdate(); vector<KUKADU_SHARED_PTR<Trajectory> > lastDmps = getLastRolloutParameters(); vector<double> lastRewards = getLastRolloutCost(); // add rollouts to history for(int i = 0; i < lastRewards.size(); ++i) { pair <double, KUKADU_SHARED_PTR<Trajectory> > p(lastRewards.at(i), lastDmps.at(i)); sampleHistory.push_back(p); } // sort by reward... sort(sampleHistory.begin(), sampleHistory.end(), rewardComparator); // ...and discard bad samples int histSize = sampleHistory.size(); for(int i = (importanceSamplingCount - 1); i < histSize; ++i) sampleHistory.pop_back(); double totalReward = 0.0; for(int i = 0; i < sampleHistory.size(); ++i) totalReward = totalReward + sampleHistory.at(i).first; vector<vec> lastUpCoeffs = lastUp->getCoefficients(); vec newCoeffsJ(lastUpCoeffs.at(0).n_elem); vector<vec> newCoeffs; for(int i = 0; i < lastUp->getCoefficients().size(); ++i) { vec v = lastUp->getCoefficients().at(i); newCoeffs.push_back(v); } cout << "====================" << endl; // for each degree of freedom for(int i = 0; i < newCoeffs.size(); ++i) { vec currentDegCoeffs = newCoeffs.at(i); // go through all samples and weight rollouts for(int j = 0; j < sampleHistory.size(); ++j) { currentDegCoeffs += sampleHistory.at(j).first / totalReward * (sampleHistory.at(j).second->getCoefficients().at(i) - lastUpCoeffs.at(i)); } newCoeffs[i] = currentDegCoeffs; } KUKADU_SHARED_PTR<Trajectory> newUp = lastUp->copy(); newUp->setCoefficients(newCoeffs); return newUp; }
std::vector<KUKADU_SHARED_PTR<Trajectory> > PoWER::computeRolloutParamters() { KUKADU_SHARED_PTR<Trajectory> lastUp = getLastUpdate(); vector<vec> dmpCoeffs = lastUp->getCoefficients(); vector<KUKADU_SHARED_PTR<Trajectory> > nextCoeffs; for(int k = 0; k < updatesPerRollout; ++k) { vec currCoeff; for(int i = 0; i < dmpCoeffs.size(); ++i) { currCoeff = dmpCoeffs.at(i); for(int j = 0; j < currCoeff.n_elem; ++j) { kukadu_normal_distribution normal = normals.at(j); double eps = normal(generator); currCoeff(j) += eps; } dmpCoeffs[i] = currCoeff; } KUKADU_SHARED_PTR<Trajectory> nextUp = lastUp->copy(); nextUp->setCoefficients(dmpCoeffs); nextCoeffs.push_back(nextUp); } return nextCoeffs; }