/** * Store the given experience. * * @param state Given state. * @param action Given action. * @param reward Given reward. * @param nextState Given next state. * @param isEnd Whether next state is terminal state. */ void Store(const StateType& state, ActionType action, double reward, const StateType& nextState, bool isEnd) { states.col(position) = state.Encode(); actions(position) = action; rewards(position) = reward; nextStates.col(position) = nextState.Encode(); isTerminal(position) = isEnd; position++; if (position == capacity) { full = true; position = 0; } }
int main(int argc, char **argv) { OptionParser opts; string mapFile, evidFile; int factor; opts.addOption(new StringOption("map", "--map <filename> : map file", "../input/grid.bmp", mapFile, false)); opts.addOption(new StringOption("evidence", "--evidence <filename> : evidence file", "", evidFile, true)); opts.addOption(new IntOption("factor", "--factor <int> : scaling factor", 1, factor, true)); opts.parse(argc,argv); JetColorMap jet; RGBTRIPLE black = {0,0,0}; RGBTRIPLE white = {255,255,255}; RGBTRIPLE red; red.R = 255; red.G = 0; red.B = 0; RGBTRIPLE blue; blue.R = 0; blue.G = 0; blue.B = 255; RGBTRIPLE green; green.R = 0; green.G = 255; green.B = 0; RGBTRIPLE initialColor; initialColor.R = 111; initialColor.G = 49; initialColor.B = 152; // initialColor.G = 152; // initialColor.B = 49; RGBTRIPLE currentColor; currentColor.R = 181; currentColor.G = 165; currentColor.B = 213; // currentColor.G = 213; // currentColor.B = 165; RGBTRIPLE magenta; magenta.R = 255; magenta.G = 0; magenta.B = 255; RGBTRIPLE cyan; cyan.R = 0; cyan.G = 255; cyan.B = 255; RGBTRIPLE yellow; yellow.R = 255; yellow.G = 255; yellow.B = 0; BMPFile bmpFile(mapFile); Grid grid(bmpFile, black); Evidence testSet(evidFile, grid, factor); /* if (1) { evid.split(trainSet, testSet, 0.8); }else{ evid.deterministicsplit(trainSet, testSet); }*/ #if 0 cout << "Creating Markov Model"<<endl; MarkovModel markmodel(grid, trainSet); double totalObj = 0.0; for (int i=0; i < testSet.size(); i++) { vector<pair<int, int> > path = testSet.at(i); cout << "Calling eval"<<endl; double obj = markmodel.eval(path); cout << "OBJ: "<<i<<" "<<obj<<endl; totalObj += obj; } cout << "TOTAL OBJ: "<<totalObj<<endl; cout << "AVERAGE OBJ: "<<totalObj/testSet.size()<<endl; return 0; #endif vector<PosFeature> features; cout << "Constant Feature"<<endl; ConstantFeature constFeat(grid); features.push_back(constFeat); cout << "Obstacle Feature"<<endl; ObstacleFeature obsFeat(grid); features.push_back(obsFeat); for (int i=1; i < 5; i++) { cout << "Blur Feature "<<i<<endl; ObstacleBlurFeature blurFeat(grid, 5*i); features.push_back(blurFeat); } cout << "Creating feature array"<<endl; FeatureArray featArray2(features); cout << "Creating lower resolution feature array"<<endl; FeatureArray featArray(featArray2, factor); pair<int, int> dims = grid.dims(); pair<int, int> lowDims((int)ceil((float)dims.first/factor), (int)ceil((float)dims.second/factor)); vector<double> weights(features.size(), -0.0); weights.at(1) = -6.2; //for (int i=2; i < weights.size(); i++) // weights.at(i) = -1.0; weights.at(0) = -2.23;//-2.23 weights.at(2) = -0.35; weights.at(3) = -2.73; weights.at(4) = -0.92; weights.at(5) = -0.26; Parameters params(weights); OrderedWaveInferenceEngine engine(InferenceEngine::GRID8); vector<vector<double> > prior(dims.first,vector<double> (dims.second,0.0)); /* double divide = 1.0; vector<double> radiusWeight; for (int i=0; i < 20; i++) { radiusWeight.push_back(1.0/divide); divide*=2; } generatePrior(grid, trainSet, priorOrig, radiusWeight, factor); reducePrior(priorOrig, prior, factor); */ vector<vector<vector<double> > > partition, backpartition; int time0 = time(0); BMPFile gridView(dims.first, dims.second); RewardMap rewards(featArray, params); vector<double> sums(params.size(),0.00001); vector<vector<double> > occupancy; Predictor predictor(grid, rewards, engine); predictor.setPrior(prior); cout << testSet.size() <<" Examples"<<endl; for (int i=0; i < testSet.size(); i++) { int index = 0; vector<pair<int, int> > traj = testSet.at(i); vector<double> times = testSet.getTimes(i); pair<int, int> initial = traj.front(); pair<int,int> & botinGrid = testSet.at_bot(i); pair<double,double>& botinPoint = testSet.at_rbot(i); pair<double,double>& end = testSet.at_raw(i).back(); predictor.setStart(initial); double thresh = -20.0; double startTime = times.front(); char buf[1024]; sprintf(buf, "../output/pppredict%03d.dat", i); ofstream file(buf); for (double tick = startTime; index < traj.size(); tick+=0.4) { for ( ; index < traj.size() && times.at(index) < tick; index++); if (index == traj.size() ) break; cout << "Evidence: "<<i<<" timestep: "<<tick <<" index: "<<index<<endl; predictor.predict(traj.at(index), occupancy); cout << "SIZE: "<<prior.size()<<endl; vector<vector<double> > pos = predictor.getPosterior(); gridView.addBelief(pos, -30.0, 0.0,jet); grid.addObstacles(gridView, black); gridView.addLabel(botinGrid,green); vector<pair<int, int> > subTraj; subTraj.insert(subTraj.end(), traj.begin(), traj.begin()+index); gridView.addVector(subTraj, red, factor); sprintf(buf, "../compare/pp%03d-%03f.bmp", i, tick-startTime); gridView.write(buf); //pair<double,double> values = predictor.check(traj.back()); double cost = 0.0; for(int itr = 0;itr<index;itr++) cost +=rewards.at(traj[itr].first,traj[itr].second); cout<<i<<" Normalizer: "<<predictor.getNormalizer(traj.back())<< " path cost: "<<cost<<" Probability: "<<cost+predictor.getNormalizer(traj.back())<<endl; vector<vector<vector<double> > > timeOcc = predictor.getTimeOccupancy(); vector<vector<double > > posterior = predictor.getPosterior(); double maxV = -HUGE_VAL; pair<int,int> predestGrid; pair<double,double> predestPoint; for (int ii=0; ii< dims.first; ii++) { for (int jj=0; jj < dims.second; jj++) { if(posterior[ii][jj]>maxV){ predestGrid.first = ii; predestGrid.second = jj; } maxV = max(maxV, posterior.at(ii).at(jj)); } } predestPoint = grid.grid2Real(predestGrid.first,predestGrid.second); double dist = sqrt((end.first-predestPoint.first)*(end.first-predestPoint.first) +(end.second-predestPoint.second)*(end.second-predestPoint.second)); double logloss = entropy(posterior); cout<<"final belief: "<<posterior.at(traj.back().first).at(traj.back().second) <<" max: "<<maxV <<" logloss: "<<logloss<<endl; cout<<botinGrid.first<<" "<<botinGrid.second <<" "<<predestGrid.first<<" "<<predestGrid.second<<endl; file<<tick-startTime <<" "<<logloss <<" "<<posterior.at(botinGrid.first).at(botinGrid.second) <<" "<<posterior.at(traj.back().first).at(traj.back().second) <<" "<<maxV<<" "<<dist<<endl; } file.close(); } }
int main (int argc, char *argv[]) { /************************* * Initialisation de MPI * *************************/ boost::mpi::environment env(argc, argv, MPI_THREAD_MULTIPLE, true); boost::mpi::communicator world; /**************************** * Il faut au moins 4 nœuds * ****************************/ const size_t ALL = world.size(); const size_t RANK = world.rank(); /************************ * Initialisation de EO * ************************/ eoParser parser(argc, argv); eoState state; // keeps all things allocated dim::core::State state_dim; // keeps all things allocated /***************************** * Definition des paramètres * *****************************/ bool sync = parser.createParam(bool(true), "sync", "sync", 0, "Islands Model").value(); bool smp = parser.createParam(bool(true), "smp", "smp", 0, "Islands Model").value(); unsigned nislands = parser.createParam(unsigned(4), "nislands", "Number of islands (see --smp)", 0, "Islands Model").value(); // a double alphaP = parser.createParam(double(0.2), "alpha", "Alpha Probability", 'a', "Islands Model").value(); double alphaF = parser.createParam(double(0.01), "alphaF", "Alpha Fitness", 'A', "Islands Model").value(); // b double betaP = parser.createParam(double(0.01), "beta", "Beta Probability", 'b', "Islands Model").value(); // d double probaSame = parser.createParam(double(100./(smp ? nislands : ALL)), "probaSame", "Probability for an individual to stay in the same island", 'd', "Islands Model").value(); // I bool initG = parser.createParam(bool(true), "initG", "initG", 'I', "Islands Model").value(); bool update = parser.createParam(bool(true), "update", "update", 'U', "Islands Model").value(); bool feedback = parser.createParam(bool(true), "feedback", "feedback", 'F', "Islands Model").value(); bool migrate = parser.createParam(bool(true), "migrate", "migrate", 'M', "Islands Model").value(); unsigned nmigrations = parser.createParam(unsigned(1), "nmigrations", "Number of migrations to do at each generation (0=all individuals are migrated)", 0, "Islands Model").value(); unsigned stepTimer = parser.createParam(unsigned(1000), "stepTimer", "stepTimer", 0, "Islands Model").value(); bool deltaUpdate = parser.createParam(bool(true), "deltaUpdate", "deltaUpdate", 0, "Islands Model").value(); bool deltaFeedback = parser.createParam(bool(true), "deltaFeedback", "deltaFeedback", 0, "Islands Model").value(); double sensitivity = 1 / parser.createParam(double(1.), "sensitivity", "sensitivity of delta{t} (1/sensitivity)", 0, "Islands Model").value(); std::string rewardStrategy = parser.createParam(std::string("avg"), "rewardStrategy", "Strategy of rewarding: best or avg", 0, "Islands Model").value(); std::vector<double> rewards(smp ? nislands : ALL, 1.); std::vector<double> timeouts(smp ? nislands : ALL, 1.); for (size_t i = 0; i < (smp ? nislands : ALL); ++i) { std::ostringstream ss; ss << "reward" << i; rewards[i] = parser.createParam(double(1.), ss.str(), ss.str(), 0, "Islands Model").value(); ss.str(""); ss << "timeout" << i; timeouts[i] = parser.createParam(double(1.), ss.str(), ss.str(), 0, "Islands Model").value(); } /********************************* * Déclaration des composants EO * *********************************/ unsigned chromSize = parser.getORcreateParam(unsigned(0), "chromSize", "The length of the bitstrings", 'n',"Problem").value(); eoInit<EOT>& init = dim::do_make::genotype(parser, state, EOT(), 0); eoEvalFunc<EOT>* ptEval = NULL; ptEval = new SimulatedEval( rewards[RANK] ); state.storeFunctor(ptEval); eoEvalFuncCounter<EOT> eval(*ptEval); unsigned popSize = parser.getORcreateParam(unsigned(100), "popSize", "Population Size", 'P', "Evolution Engine").value(); dim::core::Pop<EOT>& pop = dim::do_make::detail::pop(parser, state, init); double targetFitness = parser.getORcreateParam(double(1000), "targetFitness", "Stop when fitness reaches",'T', "Stopping criterion").value(); unsigned maxGen = parser.getORcreateParam(unsigned(0), "maxGen", "Maximum number of generations () = none)",'G',"Stopping criterion").value(); dim::continuator::Base<EOT>& continuator = dim::do_make::continuator<EOT>(parser, state, eval); dim::core::IslandData<EOT> data(smp ? nislands : -1); std::string monitorPrefix = parser.getORcreateParam(std::string("result"), "monitorPrefix", "Monitor prefix filenames", '\0', "Output").value(); dim::utils::CheckPoint<EOT>& checkpoint = dim::do_make::checkpoint<EOT>(parser, state, continuator, data, 1, stepTimer); /************** * EO routine * **************/ make_parallel(parser); make_verbose(parser); make_help(parser); if (!smp) // no smp enabled use mpi instead { /**************************************** * Distribution des opérateurs aux iles * ****************************************/ eoMonOp<EOT>* ptMon = NULL; if (sync) { ptMon = new DummyOp; } else { ptMon = new SimulatedOp( timeouts[RANK] ); } state.storeFunctor(ptMon); /********************************** * Déclaration des composants DIM * **********************************/ dim::core::ThreadsRunner< EOT > tr; dim::evolver::Easy<EOT> evolver( /*eval*/*ptEval, *ptMon, false ); dim::feedbacker::Base<EOT>* ptFeedbacker = NULL; if (feedback) { if (sync) { ptFeedbacker = new dim::feedbacker::sync::Easy<EOT>(alphaF); } else { ptFeedbacker = new dim::feedbacker::async::Easy<EOT>(alphaF, sensitivity, deltaFeedback); } } else { ptFeedbacker = new dim::algo::Easy<EOT>::DummyFeedbacker(); } state_dim.storeFunctor(ptFeedbacker); dim::vectorupdater::Base<EOT>* ptUpdater = NULL; if (update) { dim::vectorupdater::Reward<EOT>* ptReward = NULL; if (rewardStrategy == "best") { ptReward = new dim::vectorupdater::Best<EOT>(alphaP, betaP); } else { ptReward = new dim::vectorupdater::Average<EOT>(alphaP, betaP, sensitivity, sync ? false : deltaUpdate); } state_dim.storeFunctor(ptReward); ptUpdater = new dim::vectorupdater::Easy<EOT>(*ptReward); } else { ptUpdater = new dim::algo::Easy<EOT>::DummyVectorUpdater(); } state_dim.storeFunctor(ptUpdater); dim::memorizer::Easy<EOT> memorizer; dim::migrator::Base<EOT>* ptMigrator = NULL; if (migrate) { if (sync) { ptMigrator = new dim::migrator::sync::Easy<EOT>(); } else { ptMigrator = new dim::migrator::async::Easy<EOT>(nmigrations); } } else { ptMigrator = new dim::algo::Easy<EOT>::DummyMigrator(); } state_dim.storeFunctor(ptMigrator); dim::algo::Easy<EOT> island( evolver, *ptFeedbacker, *ptUpdater, memorizer, *ptMigrator, checkpoint, monitorPrefix ); if (!sync) { tr.addHandler(*ptFeedbacker).addHandler(*ptMigrator).add(island); } /*************** * Rock & Roll * ***************/ /****************************************************************************** * Création de la matrice de transition et distribution aux iles des vecteurs * ******************************************************************************/ dim::core::MigrationMatrix probabilities( ALL ); dim::core::InitMatrix initmatrix( initG, probaSame ); if ( 0 == RANK ) { initmatrix( probabilities ); std::cout << probabilities; data.proba = probabilities(RANK); for (size_t i = 1; i < ALL; ++i) { world.send( i, 100, probabilities(i) ); } std::cout << "Island Model Parameters:" << std::endl << "alphaP: " << alphaP << std::endl << "alphaF: " << alphaF << std::endl << "betaP: " << betaP << std::endl << "probaSame: " << probaSame << std::endl << "initG: " << initG << std::endl << "update: " << update << std::endl << "feedback: " << feedback << std::endl << "migrate: " << migrate << std::endl << "sync: " << sync << std::endl << "stepTimer: " << stepTimer << std::endl << "deltaUpdate: " << deltaUpdate << std::endl << "deltaFeedback: " << deltaFeedback << std::endl << "sensitivity: " << sensitivity << std::endl << "chromSize: " << chromSize << std::endl << "popSize: " << popSize << std::endl << "targetFitness: " << targetFitness << std::endl << "maxGen: " << maxGen << std::endl ; } else { world.recv( 0, 100, data.proba ); } /****************************************** * Get the population size of all islands * ******************************************/ world.barrier(); dim::utils::print_sum(pop); FitnessInit fitInit; apply<EOT>(fitInit, pop); if (sync) { island( pop, data ); } else { tr( pop, data ); } world.abort(0); return 0 ; } // smp /********************************** * Déclaration des composants DIM * **********************************/ dim::core::ThreadsRunner< EOT > tr; std::vector< dim::core::Pop<EOT> > islandPop(nislands); std::vector< dim::core::IslandData<EOT> > islandData(nislands); dim::core::MigrationMatrix probabilities( nislands ); dim::core::InitMatrix initmatrix( initG, probaSame ); initmatrix( probabilities ); std::cout << probabilities; FitnessInit fitInit; for (size_t i = 0; i < nislands; ++i) { std::cout << "island " << i << std::endl; islandPop[i].append(popSize, init); apply<EOT>(fitInit, islandPop[i]); islandData[i] = dim::core::IslandData<EOT>(nislands, i); std::cout << islandData[i].size() << " " << islandData[i].rank() << std::endl; islandData[i].proba = probabilities(i); apply<EOT>(eval, islandPop[i]); /**************************************** * Distribution des opérateurs aux iles * ****************************************/ eoMonOp<EOT>* ptMon = NULL; ptMon = new SimulatedOp( timeouts[islandData[i].rank()] ); state.storeFunctor(ptMon); eoEvalFunc<EOT>* __ptEval = NULL; __ptEval = new SimulatedEval( rewards[islandData[i].rank()] ); state.storeFunctor(__ptEval); dim::evolver::Base<EOT>* ptEvolver = new dim::evolver::Easy<EOT>( /*eval*/*__ptEval, *ptMon, false ); state_dim.storeFunctor(ptEvolver); dim::feedbacker::Base<EOT>* ptFeedbacker = new dim::feedbacker::smp::Easy<EOT>(islandPop, islandData, alphaF); state_dim.storeFunctor(ptFeedbacker); dim::vectorupdater::Reward<EOT>* ptReward = NULL; if (rewardStrategy == "best") { ptReward = new dim::vectorupdater::Best<EOT>(alphaP, betaP); } else { ptReward = new dim::vectorupdater::Average<EOT>(alphaP, betaP, sensitivity, sync ? false : deltaUpdate); } state_dim.storeFunctor(ptReward); dim::vectorupdater::Base<EOT>* ptUpdater = new dim::vectorupdater::Easy<EOT>(*ptReward); state_dim.storeFunctor(ptUpdater); dim::memorizer::Base<EOT>* ptMemorizer = new dim::memorizer::Easy<EOT>(); state_dim.storeFunctor(ptMemorizer); dim::migrator::Base<EOT>* ptMigrator = new dim::migrator::smp::Easy<EOT>(islandPop, islandData, monitorPrefix); state_dim.storeFunctor(ptMigrator); dim::utils::CheckPoint<EOT>& checkpoint = dim::do_make::checkpoint<EOT>(parser, state, continuator, islandData[i], 1, stepTimer); dim::algo::Base<EOT>* ptIsland = new dim::algo::smp::Easy<EOT>( *ptEvolver, *ptFeedbacker, *ptUpdater, *ptMemorizer, *ptMigrator, checkpoint, islandPop, islandData, monitorPrefix ); state_dim.storeFunctor(ptIsland); ptEvolver->size(nislands); ptFeedbacker->size(nislands); ptReward->size(nislands); ptUpdater->size(nislands); ptMemorizer->size(nislands); ptMigrator->size(nislands); ptIsland->size(nislands); ptEvolver->rank(i); ptFeedbacker->rank(i); ptReward->rank(i); ptUpdater->rank(i); ptMemorizer->rank(i); ptMigrator->rank(i); ptIsland->rank(i); tr.add(*ptIsland); } tr(pop, data); return 0 ; }
pair<double,double> CoCheckersExperiment::playGame( shared_ptr<NEAT::GeneticIndividual> ind1, shared_ptr<NEAT::GeneticIndividual> ind2 ) { //You get 1 point just for entering the game, wahooo! pair<double,double> rewards(1.0,1.0); #if DEBUG_GAME_ANNOUNCER cout << "Playing game\n"; #endif populateSubstrate(ind1,0); populateSubstrate(ind2,1); uchar b[8][8]; //cout << "Playing games with HyperNEAT as black\n"; //for (handCodedType=0;handCodedType<5;handCodedType++) for (testCases=0;testCases<2;testCases++) { if (testCases==0) { individualBlack = ind1; individualWhite = ind2; } else //testCases==1 { individualBlack = ind2; individualWhite = ind1; } resetBoard(b); int retval=-1; int rounds=0; for (rounds=0;rounds<CHECKERS_MAX_ROUNDS&&retval==-1;rounds++) { //cout << "Round: " << rounds << endl; moveToMake = CheckersMove(); if (testCases==0) { currentSubstrateIndex=0; } else //testCases==1 { currentSubstrateIndex=1; } //cout << "Black is thinking...\n"; evaluatemax(b,CheckersNEATDatatype(INT_MAX/2),0,2); #if CHECKERS_EXPERIMENT_DEBUG cout << "BLACK MAKING MOVE\n"; printBoard(b); #endif if (moveToMake.from.x==255) { //black loses cout << "BLACK LOSES!\n"; retval = WHITE; } else { makeMove(moveToMake,b); retval = getWinner(b,WHITE); } #if CHECKERS_EXPERIMENT_LOG_EVALUATIONS memcpy(gameLog[rounds*2],b,sizeof(uchar)*8*8); #endif #if COCHECKERS_EXPERIMENT_DEBUG printBoard(b); CREATE_PAUSE(""); #endif if (retval==-1) { //printBoard(b); moveToMake = CheckersMove(); { //progress_timer t; if (testCases==0) { currentSubstrateIndex=1; } else //testCases==1 { currentSubstrateIndex=0; } //cout << "White is thinking...\n"; evaluatemin(b,CheckersNEATDatatype(INT_MAX/2),0,3); //cout << "SimpleCheckers time: "; } #if COCHECKERS_EXPERIMENT_DEBUG cout << "WHITE MAKING MOVE\n"; printBoard(b); #endif if (moveToMake.from.x==255) { //white loses cout << "WHITE LOSES BECAUSE THERE'S NO MOVES LEFT!\n"; retval = BLACK; #if COCHECKERS_EXPERIMENT_DEBUG printBoard(b); CREATE_PAUSE(""); #endif } else { makeMove(moveToMake,b); retval = getWinner(b,BLACK); } #if COCHECKERS_EXPERIMENT_DEBUG printBoard(b); CREATE_PAUSE(""); #endif } #if CHECKERS_EXPERIMENT_LOG_EVALUATIONS memcpy(gameLog[rounds*2+1],b,sizeof(uchar)*8*8); #endif } if (retval==BLACK) { #if DEBUG_GAME_ANNOUNCER cout << "BLACK WON!\n"; #endif if (ind1==individualBlack) { rewards.first += 800; rewards.first += (CHECKERS_MAX_ROUNDS-rounds); } else { rewards.second += 800; rewards.second += (CHECKERS_MAX_ROUNDS-rounds); } } else if (retval==-1) //draw { #if DEBUG_GAME_ANNOUNCER cout << "WE TIED!\n"; #endif //rewards.first += 200; //rewards.second += 200; } else //White wins { #if DEBUG_GAME_ANNOUNCER cout << "WHITE WON\n"; #endif if (ind1==individualWhite) { rewards.first += 800; rewards.first += (CHECKERS_MAX_ROUNDS-rounds); } else { rewards.second += 800; rewards.second += (CHECKERS_MAX_ROUNDS-rounds); } } int whiteMen,blackMen,whiteKings,blackKings; //countPieces(gi.board,whiteMen,blackMen,whiteKings,blackKings); countPieces(b,whiteMen,blackMen,whiteKings,blackKings); if (ind1==individualWhite) { rewards.first += (2 * (whiteMen) ); rewards.first += (3 * (whiteKings) ); rewards.second += (2 * (blackMen) ); rewards.second += (3 * (blackKings) ); } else { rewards.first += (2 * (blackMen) ); rewards.first += (3 * (blackKings) ); rewards.second += (2 * (whiteMen) ); rewards.second += (3 * (whiteKings) ); } } #if DEBUG_GAME_ANNOUNCER cout << "Fitness earned: " << rewards.first << " & " << rewards.second << endl; CREATE_PAUSE(""); #endif return rewards; }
double trajOptimizerplus::eval(vector<double> ¶ms) { cout << "IN EVAL "<<itrcount++<<" "<<params.size()<<endl; for (int i=0; i < params.size(); i++) cout << "PARAMS IN: "<<i<<" "<<params.at(i)<<endl; int factor = evidence.getFactor(); pair<int, int> dims = grid.dims(); int v_dim = seqFeat.num_V(); /* pair<int, int> lowDims((int)ceil((float)dims.first/factor), (int)ceil((float)dims.second/factor)); */ vector<vector<vector<double> > > prior(dims.first, vector<vector<double> >(dims.second, vector<double> (v_dim,-HUGE_VAL))); double obj = 0.0; vector<double> gradient(params.size(), 0.0); vector<vector<vector<double> > > occupancy; vector<vector<double> > layerOccupancy; layerOccupancy.resize(dims.first,vector<double>(dims.second,-HUGE_VAL)); vector<double> modelFeats, pathFeats; for (int i=0; i < evidence.size(); i++) { for (int j=0; j < params.size(); j++) { cout << " "<<j<<" "<<params.at(j); } cout<<endl; cout << "Evidence #"<<i<<endl; vector<pair<int, int> >& trajectory = evidence.at(i); vector<double>& velocityseq = evidence.at_v(i); pair<int,int>& bot = evidence.at_bot(i); // robot local blurres features for (int r=1; r <= NUMROBFEAT; r++) { cout << "Adding Robot Feature "<<r<<endl; RobotLocalBlurFeature robblurFeat(grid,bot,10*r); // RobotGlobalFeature robFeat(grid,bot); posFeatures.push_back(robblurFeat); } cout << " Creating feature array"<<endl; FeatureArray featArray2(posFeatures); FeatureArray featArray(featArray2, factor); for (int rr=1; rr<= NUMROBFEAT; rr++) posFeatures.pop_back(); // split different posfeatures and seqfeature weights vector<double> p_weights,s_weights; int itr = 0; for (; itr<featArray.size(); itr++) p_weights.push_back(params[itr]); for (; itr<params.size(); itr++) s_weights.push_back(params[itr]); //cout<<"Params"<<endl; Parameters p_parameters(p_weights), s_parameters(s_weights); /* cout<<featArray.size()<<endl; cout<<params.size()<<endl; cout<<p_weights.size()<<endl; cout<<s_weights.size()<<endl; cout<<p_parameters.size()<<endl; cout<<s_parameters.size()<<endl; */ //cout<<"Reward"<<endl; RewardMap rewards(featArray,seqFeat,p_parameters,s_parameters); DisSeqPredictor predictor(grid, rewards, engine); // sum of reward along the trajectory double cost = 0.0; //cout<< trajectory.size()<<endl; for (int j=0; j < trajectory.size(); j++) { //cout<<j<<" "<<trajectory.at(j).first<<" "<< trajectory.at(j).second<< " "<< seqFeat.getFeat(velocityseq.at(j))<<endl; cost+=rewards.at(trajectory.at(j).first, trajectory.at(j).second, seqFeat.getFeat(velocityseq.at(j))); } State initial(trajectory.front(),seqFeat.getFeat(velocityseq.front())); State destination(trajectory.back(),seqFeat.getFeat(velocityseq.back())); //for (int k=0;k<v_dim;k++) prior.at(destination.x()).at(destination.y()).at(destination.disV) = 0.0; cout << "Initial: "<<initial.x()<<" "<<initial.y()<<" "<<initial.disV<<endl; cout << "Destination: "<<destination.x()<<" " <<destination.y()<<" "<<destination.disV<<endl; predictor.setStart(initial); predictor.setPrior(prior); double norm = predictor.forwardBackwardInference(initial, occupancy); for (int l=0; l<v_dim; l++) { BMPFile gridView(dims.first, dims.second); for (int x= 0; x<dims.first; x++) { for(int y=0; y<dims.second; y++) { layerOccupancy.at(x).at(y) = occupancy.at(x).at(y).at(l); } } char buf[1024]; /* RobotGlobalFeature robblurFeat(grid,bot); gridView.addBelief(robblurFeat.getMap(), 0.0, 25, white, red); gridView.addVector(trajectory, blue, factor); gridView.addLabel(bot,green); sprintf(buf, "../figures/feat%04d_%d.bmp",i,l); gridView.write(buf); */ gridView.addBelief(layerOccupancy, -300.0, 5.0, white, red); //grid.addObstacles(gridView, black); gridView.addLabel(bot,green); gridView.addVector(trajectory, blue, factor); sprintf(buf, "../figures/train%04d_%d.bmp",i,l); gridView.write(buf); } /* for (int i=0; i < occupancy.size(); i++) for (int j=0; j < occupancy.at(i).size(); j++) if (occupancy.at(i).at(j) > -10) cout << i <<" "<<j<<" "<<occupancy.at(i).at(j)<<endl; */ featArray.featureCounts(occupancy, modelFeats); featArray.featureCounts(trajectory, pathFeats); seqFeat.featureCounts_vec(occupancy,modelFeats); seqFeat.featureCounts_vec(velocityseq,pathFeats); for (int k=0; k < params.size(); k++) { double diff = pathFeats.at(k) - modelFeats.at(k); gradient.at(k) -= diff; cout <<" Gradient ("<< k << " -grad: "<< gradient.at(k) <<" -path: "<< pathFeats.at(k)<<" -model: "<< modelFeats.at(k)<<")"; } cout<<endl; cout << "OBJ: "<<cost-norm<< " "<<cost<<" "<<norm<<endl; obj += (cost - norm); /* obj is the path probability * cost is the sum of rewards: sum f(s,a) * norm is V(s_1->G), since here s_T = G, V(s_T->G) = 0*/ prior.at(destination.x()).at(destination.y()).at(destination.disV) = -HUGE_VAL; } cout << "RETURN OBJ: "<<-obj<<endl; params = gradient; return -obj; }
double trajectoryOptimizer::eval(vector<double> ¶ms, vector<double> &gradient) { cout << "IN EVAL "<<params.size()<<endl; for (int i=0; i < params.size(); i++) cout << "PARAMS IN: "<<i<<" "<<params.at(i)<<endl; int factor = evidence.getFactor(); // cout << "FACTOR: "<<factor<<endl; FeatureArray featArray2(features); FeatureArray featArray(featArray2, factor); //cout<<"Dims featarray "<<featArray.dims().first<<" "<<featArray.dims().second<<endl; Parameters parameters(params); //cout << "Calculating rewards"<<endl; RewardMap rewards(featArray, parameters); pair<int, int> dims = grid.dims(); BMPFile gridView(dims.first, dims.second); pair<int, int> lowDims((int)ceil((float)dims.first/factor), (int)ceil((float)dims.second/factor)); //cout << "Computing prior"<<endl; vector<vector<double> > prior(lowDims.first, vector<double>(lowDims.second, -HUGE_VAL)); double obj = 0.0; gradient.clear(); gradient.resize(params.size(), 0.0); for (int i=0; i < evidence.size(); i++) { Predictor predictor(grid, rewards, engine); cout << "Evidence #"<<i<<endl; vector<pair<int, int> > trajectory = evidence.at(i); double cost = 0.0; for (int j=0; j < trajectory.size(); j++){ double temp = rewards.at(trajectory.at(j).first, trajectory.at(j).second); cost += temp; } pair<int, int> initial = trajectory.front(); pair<int, int> destination = trajectory.back(); prior.at(destination.first).at(destination.second) = 0.0; #if 0 cout << "Initial: "<<initial.first<<" "<<initial.second<<endl; cout << "Destination: "<<destination.first<<" " <<destination.second<<endl; #endif predictor.setStart(initial); predictor.setPrior(prior); vector<vector<double> > occupancy; double norm = predictor.predict(initial, occupancy); gridView.addBelief(occupancy, -300.0, 0.0, white, red); gridView.addVector(trajectory, blue, factor); char buf[1024]; sprintf(buf, "../figures/train%04d.bmp", i); gridView.write(buf); vector<double> modelFeats, pathFeats; //cout << "Computing feature counts"<<endl; /* for (int i=0; i < occupancy.size(); i++) for (int j=0; j < occupancy.at(i).size(); j++) if (occupancy.at(i).at(j) > -10) cout << i <<" "<<j<<" "<<occupancy.at(i).at(j)<<endl; */ featArray.featureCounts(occupancy, modelFeats); featArray.featureCounts(trajectory, pathFeats); cout << "GRADIENT"<<endl; for (int k=0; k < params.size(); k++) { double diff = pathFeats.at(k) - modelFeats.at(k); gradient.at(k) -= diff; cout << k << ": " << gradient.at(k) << " " << pathFeats.at(k) << " " << modelFeats.at(k) <<endl; } cout << "OBJ: "<<cost-norm<<endl; cout << " "<<cost<<" "<<norm<<endl; obj += (cost - norm); prior.at(destination.first).at(destination.second) = -HUGE_VAL; } cout << "RETURN OBJ: "<<-obj<<endl; return -obj; }