Esempio n. 1
0
    KUKADU_SHARED_PTR<Trajectory> PoWER::updateStep() {

        KUKADU_SHARED_PTR<Trajectory> lastUp = getLastUpdate();

        vector<KUKADU_SHARED_PTR<Trajectory> > lastDmps = getLastRolloutParameters();
        vector<double> lastRewards = getLastRolloutCost();

        // add rollouts to history
        for(int i = 0; i < lastRewards.size(); ++i) {
            pair <double, KUKADU_SHARED_PTR<Trajectory> > p(lastRewards.at(i), lastDmps.at(i));
            sampleHistory.push_back(p);
        }

        // sort by reward...
        sort(sampleHistory.begin(), sampleHistory.end(), rewardComparator);

        // ...and discard bad samples
        int histSize = sampleHistory.size();
        for(int i = (importanceSamplingCount - 1); i < histSize; ++i)
            sampleHistory.pop_back();

        double totalReward = 0.0;
        for(int i = 0; i < sampleHistory.size(); ++i)
            totalReward = totalReward + sampleHistory.at(i).first;

        vector<vec> lastUpCoeffs = lastUp->getCoefficients();
        vec newCoeffsJ(lastUpCoeffs.at(0).n_elem);
        vector<vec> newCoeffs;

        for(int i = 0; i < lastUp->getCoefficients().size(); ++i) {
            vec v = lastUp->getCoefficients().at(i);
            newCoeffs.push_back(v);
        }
        cout << "====================" << endl;

        // for each degree of freedom
        for(int i = 0; i < newCoeffs.size(); ++i) {

            vec currentDegCoeffs = newCoeffs.at(i);

            // go through all samples and weight rollouts
            for(int j = 0; j < sampleHistory.size(); ++j) {
                currentDegCoeffs += sampleHistory.at(j).first / totalReward * (sampleHistory.at(j).second->getCoefficients().at(i) - lastUpCoeffs.at(i));
            }

            newCoeffs[i] = currentDegCoeffs;

        }

        KUKADU_SHARED_PTR<Trajectory> newUp = lastUp->copy();
        newUp->setCoefficients(newCoeffs);

        return newUp;

    }
Esempio n. 2
0
    std::vector<KUKADU_SHARED_PTR<Trajectory> > PoWER::computeRolloutParamters() {

        KUKADU_SHARED_PTR<Trajectory> lastUp = getLastUpdate();
        vector<vec> dmpCoeffs = lastUp->getCoefficients();
        vector<KUKADU_SHARED_PTR<Trajectory> > nextCoeffs;

        for(int k = 0; k < updatesPerRollout; ++k) {

            vec currCoeff;
            for(int i = 0; i < dmpCoeffs.size(); ++i) {

                currCoeff = dmpCoeffs.at(i);

                for(int j = 0; j < currCoeff.n_elem; ++j) {

                    kukadu_normal_distribution normal = normals.at(j);
                    double eps = normal(generator);

                    currCoeff(j) += eps;

                }

                dmpCoeffs[i] = currCoeff;

            }

            KUKADU_SHARED_PTR<Trajectory> nextUp = lastUp->copy();
            nextUp->setCoefficients(dmpCoeffs);

            nextCoeffs.push_back(nextUp);

        }

        return nextCoeffs;

    }