int main(int argc, char **argv) { ArgumentHandlers::Arguments args; argp_parse (&ArgumentHandlers::theArgpStruc, argc, argv, 0, 0, &args); int restarts = args.nrRestarts; srand(time(0)); int horizon=args.horizon; try { //start timers Timing Time; Time.Start("Overall"); DecPOMDPDiscreteInterface & decpomdp = * GetDecPOMDPDiscreteInterfaceFromArgs(args); //set the filename etc. string filename="/dev/null",timingsFilename="/dev/null"; ofstream of; stringstream ss; ss << directories::MADPGetResultsFilename("JESP",decpomdp,args) << SoftPrint(args.jesp) //the jesp type << "_h" << horizon << "_JESPrestarts"<< restarts; //check the method specific arguments and add them to file name switch(args.jesp) { case JESPtype::JESPExhaustive: break; case JESPtype::JESPDP: break; } if(!args.dryrun) { filename=ss.str(); timingsFilename=filename + "_Timings"; of.open(filename.c_str()); if(!of) { cout << "JESP: could not open " << filename << endl; cout << "Results will not be stored to disk." << endl; args.dryrun=true; } if(!args.dryrun) { cout << "Computing " << ss.str() << endl; //write headers of << "#horiz."<<"\t"; of << "value " <<"\t"; of << "ticks"<< "\t"; of << "utime" <<"\t"; of << "found jpol index\t(1tick=1/"<<sysconf(_SC_CLK_TCK)<<"s)\n"; of.flush(); } } //Initialization of the planner with typical options for JESP: Time.Start("PlanningUnit"); PlanningUnitMADPDiscreteParameters params; params.SetComputeAll(true); params.SetComputeJointActionObservationHistories(false); params.SetComputeJointObservationHistories(false); params.SetComputeJointBeliefs(false); if(args.sparse) params.SetUseSparseJointBeliefs(true); else params.SetUseSparseJointBeliefs(false); PlanningUnitDecPOMDPDiscrete* jesp = 0; if(args.jesp == JESPtype::JESPExhaustive) { jesp = new JESPExhaustivePlanner (params,horizon,&decpomdp); cout << "JESPExhaustivePlanner initialized" << endl; } else if(args.jesp == JESPtype::JESPDP) { jesp = new JESPDynamicProgrammingPlanner (params,horizon,&decpomdp); cout << "JESPDynamicProgrammingPlanner initialized" << endl; } Time.Stop("PlanningUnit"); cout << "JESP Planner initialized" << endl; for(int restartI = 0; restartI < restarts; restartI++) { //start all timers: Time.Start("Plan"); tms ts_before, ts_after; clock_t ticks_before, ticks_after; ticks_before = times(&ts_before); jesp->Plan(); double V = jesp->GetExpectedReward(); if(args.verbose >= 0) { cout << "value="<< V << endl; if(args.verbose) { jesp->GetJointPolicyPureVector()->Print(); cout << endl; } } //stop all timers ticks_after = times(&ts_after); clock_t ticks = ticks_after - ticks_before; clock_t utime = ts_after.tms_utime - ts_before.tms_utime; Time.Stop("Plan"); #if CHECK_RESULT ValueFunctionDecPOMDPDiscrete vf(jesp, jesp->GetJointPolicyPureVector()); double v = vf.CalculateV(true); cout << "Validated value (exact/approx):="<<v; SimulationDecPOMDPDiscrete sim(*jesp, 1000); SimulationResult simres = sim.RunSimulations( jesp->GetJointPolicyPureVector() ); v = simres.GetAvgReward(); cout << " / "<<v <<endl; #endif if(!args.dryrun) { of << horizon<<"\t"; char formvalue[10]; sprintf(formvalue, "%.6f", V); of << formvalue <<"\t"; of << ticks <<"\t"; of << utime <<"\t"; of << jesp->GetJointPolicyPureVector()->GetIndex() <<"\n"; of.flush(); } } delete jesp; Time.Stop("Overall"); if(args.verbose >= 0) { Time.PrintSummary(); } if(args.saveTimings && !args.dryrun) Time.Save(timingsFilename); } catch(E& e){ e.Print(); } }
int main(int argc, char **argv) { DecPOMDPDiscreteInterface* decpomdp; try { ArgumentHandlers::Arguments args; argp_parse (&ArgumentHandlers::theArgpStruc, argc, argv, 0, 0, &args); Timing times; times.Start("Parsing"); //DecPOMDPDiscreteInterface* decpomdp = GetDecPOMDPDiscreteInterfaceFromArgs(args); TransitionObservationIndependentMADPDiscrete *toi=0; if((toi=dynamic_cast<TransitionObservationIndependentMADPDiscrete*>(decpomdp)) && args.qheur==eQMDP && !args.cache_flat_models /* otherwise * GetDecPOMDPDiscreteInterfaceFromArgs * already caches the flat * models */) { // we don't need a centralized obs model toi->CreateCentralizedSparseTransitionModel(); } times.Stop("Parsing"); if(!args.dryrun) directories::MADPCreateResultsDir("GMAA",*decpomdp); size_t horizon; if(args.infiniteHorizon) horizon=MAXHORIZON; else horizon=args.horizon; times.Start("Overall"); PlanningUnitMADPDiscreteParameters params; #if 0 // Caching doesn't seem worth the trouble if we're computing // just one thing (not to mention the memory savings) if(Qheur==eQMDP) // don't need any of this for solving the MDP params.SetComputeAll(false); else { params.SetComputeAll(true); params.SetUseSparseJointBeliefs(true); } #else params.SetComputeAll(false); if(args.sparse) params.SetUseSparseJointBeliefs(true); #endif times.Start("PlanningUnit"); NullPlanner np(params,horizon,decpomdp); times.Stop("PlanningUnit"); struct timeval tvStart, tvEnd; gettimeofday (&tvStart, NULL); QFunctionJAOHInterface* q=0; for(int restartI = 0; restartI < args.nrRestarts; restartI++) { // with hybrid heuristics already some computation is done // before Compute(), so start timing now times.Start("ComputeQ"); q = GetQheuristicFromArgs(&np, args); q->Compute(); times.Stop("ComputeQ"); // we want to keep the last q computed if(restartI<(args.nrRestarts-1)) delete q; } gettimeofday (&tvEnd, NULL); clock_t wallclockTime = static_cast<clock_t>(((tvEnd.tv_sec - tvStart.tv_sec) + static_cast<double>(tvEnd.tv_usec-tvStart.tv_usec)/1e6) * sysconf(_SC_CLK_TCK)); cout << "Wallclock: from " << tvStart.tv_sec << "." << tvStart.tv_usec << " until " << tvEnd.tv_sec << "." << tvEnd.tv_usec << " which took " << wallclockTime << " clock ticks" << endl; times.AddEvent("WallclockTime", wallclockTime); if(!args.dryrun) { times.Start("Save"); q->Save(); times.Stop("Save"); if(args.verbose >= 0) cout << "Q saved to " << q->GetCacheFilename() << endl; } times.Stop("Overall"); if(args.verbose >= 0) times.PrintSummary(); if(!args.dryrun) { stringstream ss; ss << directories::MADPGetResultsDir("GMAA",*decpomdp) << "/calculateQheuristic" << q->SoftPrintBrief() << "_h" << horizon; if(decpomdp->GetDiscount()!=1) ss << "_g" << decpomdp->GetDiscount(); ss << "_Timings"; times.Save(ss.str()); if(args.verbose >= 0) cout << "Timings saved to " << ss.str() << endl; } if(horizon!=MAXHORIZON) { double Vjb0=-DBL_MAX; for(Index a=0;a!=np.GetNrJointActions();++a) Vjb0=max(q->GetQ(Globals::INITIAL_JAOHI,a),Vjb0); cout << "Value of jaohI 0 = " << Vjb0 << endl; } delete q; } catch(E& e){ e.Print(); } cout << "cleanup..." << endl; delete decpomdp; }
int main(int argc, char **argv) { cout << "DICE: direct CE Policy Search"<<endl; cout << "-----------------------------"<<endl; // parse the command line arguments ArgumentHandlers::Arguments args; argp_parse (&ArgumentHandlers::theArgpStruc, argc, argv, 0, 0, &args); int restarts = args.nrCERestarts; srand(time(0)); int horizon = args.horizon; cout << "Horizon = " << horizon << endl; try { //start timers Timing Time; Time.Start("Overall"); DecPOMDPDiscreteInterface & decpomdp = * GetDecPOMDPDiscreteInterfaceFromArgs(args); // setup the output file stream string filename="/dev/null",timingsFilename="/dev/null"; ofstream of; stringstream ss; ss << directories::MADPGetResultsFilename("DICEPS",decpomdp,args) << "h" << horizon; // add the CE parameters into the output file name ss << "_CEr" << args.nrCERestarts << "_i" << args.nrCEIterations << "_s" << args.nrCESamples << "_sfu" << args.nrCESamplesForUpdate << "_a" << args.CE_alpha << "_ht" << args.CE_use_hard_threshold << "_evals" << args.nrCEEvalutionRuns; if(!args.dryrun) { directories::MADPCreateResultsDir("DICEPS",decpomdp); filename=ss.str(); timingsFilename=filename + "_Timings"; } of.open(filename.c_str()); if(!of) { cerr << "could not open " << filename << endl; return(1); } cout << "Computing " << ss.str() << endl; //write headers of << "#horiz."<<"\t"; of << "value " <<"\t"; of << "wctime"<< "\t"; of << "utime " <<"\t"; of << "stime " <<"\t"; of << "found jpol index\t(1tick=1/"<<sysconf(_SC_CLK_TCK)<<"s)\n"; of.flush(); //Initialization of the planner with typical options for JESP: Time.Start("PlanningUnit"); PlanningUnitMADPDiscreteParameters params; params.SetComputeAll(true); params.SetComputeJointActionObservationHistories(false); params.SetComputeJointActionHistories(false); params.SetComputeIndividualActionObservationHistories(false); params.SetComputeIndividualActionHistories(false); //params.SetComputeIndividualObservationHistories(false); // joint observations histories are needed for // efficient computation of joint actions params.SetComputeJointObservationHistories(true); params.SetComputeJointBeliefs(false); if(args.sparse) params.SetUseSparseJointBeliefs(true); else params.SetUseSparseJointBeliefs(false); DICEPSPlanner* planner; planner = new DICEPSPlanner (params, &decpomdp, horizon, //CE params args.nrCERestarts, args.nrCEIterations, args.nrCESamples, args.nrCESamplesForUpdate, args.CE_use_hard_threshold, //(gamma in CE papers) args.CE_alpha, //the learning rate args.nrCEEvalutionRuns //the number of evaluation runs , args.verbose ); Time.Stop("PlanningUnit"); cout << "DICEPSPlanner initialized" << endl; clock_t total_utime_diceps=0; double total_value=0; for(int restartI = 0; restartI < restarts; restartI++) { //start all timers: tms ts_before, ts_after; clock_t ticks_before, ticks_after; Time.Start("Plan"); ticks_before = times(&ts_before); planner->Plan(); //stop all timers ticks_after = times(&ts_after); Time.Stop("Plan"); clock_t ticks = ticks_after - ticks_before; clock_t utime = ts_after.tms_utime - ts_before.tms_utime; clock_t stime = ts_after.tms_stime - ts_before.tms_stime; total_utime_diceps+=utime; double V = planner->GetExpectedReward(); if(args.verbose >= 0) { cout << "value="<< V << endl; if(args.verbose) { planner->GetJointPolicyPureVector()->Print(); cout << endl; } } total_value+=V; of << horizon<<"\t"; char formvalue[10]; sprintf(formvalue, "%.6f", V); of << formvalue <<"\t"; of << ticks <<"\t"; of << utime <<"\t"; of << stime <<"\t"; of << "-1\n";//Cannot get index of joint pol., since "planner->GetJointPolicyPureVector()->GetIndex()" does not work of.flush(); // output average statistics after completing the last restart if(restartI==(restarts-1)) of << "# h " << args.horizon<<"\t" << " avg DICEPS time (s): " << (static_cast<double>(total_utime_diceps)/ sysconf(_SC_CLK_TCK))/restarts << " avg value: " << total_value/restarts << endl; } /* clean up */ Time.Stop("Overall"); if(args.verbose >= 0) { Time.PrintSummary(); planner->PrintTimersSummary(); } #if 0 if(//args.saveTimings && !args.dryrun) { Time.Save(timingsFilename); planner->SaveTimers(timingsFilename); } #endif delete planner; } catch(E& e){ e.Print(); } }