JPPVIndexValuePair::JPPVIndexValuePair(const JointPolicyPureVector& jp, double value) : JointPolicyValuePair(value) { _m_jpol=0; _m_jpolIndex=jp.GetIndex(); _m_jpolDepth=jp.GetDepth(); _m_pu=jp.GetInterfacePTPDiscretePure(); }
double BayesianGameIdenticalPayoffSolver:: Evaluate(const JointPolicyPureVector & jpolBG) const { boost::shared_ptr<const BayesianGameIdenticalPayoffInterface> bgip=GetBGIPI(); const BayesianGameIdenticalPayoffInterface *bgipRawPtr=bgip.get(); double v = 0.0; for(Index jt = 0; jt < bgipRawPtr->GetNrJointTypes(); jt++) { Index jaI = jpolBG.GetJointActionIndex(jt); double p = bgipRawPtr->GetProbability(jt); double u = bgipRawPtr->GetUtility(jt, jaI); v += p * u; } return v; }
void JESPExhaustivePlanner::Plan() { if(DEBUG_EXJESP) { cout << "\n---------------------------------"<<endl; cout << "Exhaustive JESP - Plan() started"<<endl; cout << "---------------------------------"<<endl; } double v_best = -DBL_MAX; JointPolicyPureVector *jpol = new JointPolicyPureVector(this); JointPolicyPureVector *best = new JointPolicyPureVector(this); jpol->RandomInitialization(); if(DEBUG_EXJESP) { cout << "joint policy randomly initialized to:"; jpol->Print(); } int stop = 0; size_t nr_non_improving_agents = 0; while(nr_non_improving_agents < GetDPOMDPD()->GetNrAgents() && stop++ < 1000) { int agentI = GetNextAgentIndex(); double v = ExhaustiveBestResponse(jpol, agentI); if(v > v_best + 1e-9) { (*best) = (*jpol); if(DEBUG_EXHBR) { cout << "Plan: new best policy:"<<endl; best->Print(); } v_best = v; nr_non_improving_agents = 0; } else nr_non_improving_agents++; } _m_foundPolicy = JPPV_sharedPtr(best); _m_expectedRewardFoundPolicy=v_best; if(DEBUG_EXJESP) { cout << "Exhaustive JESP - resulting policy:"<<endl; cout << "------------------------------------"<<endl; best->Print(); } }
/* this is the high-level pseudo-code for what happens: start with a horizon 0 joint policy - i.e. specifying 0 actions JPolValPool.push( <jpol,val=0.0> ) do ppi = <pol,val> = JPolValPool.GetNext() //poolOfNextPolicies = {<pol,vals>} //isLowerBound = bool - whether the vals are lower bounds to the // optimal value (i.e. value for the optimal policy) <poolOfNextPolicies, isLowerBound> = ConstructAndValuateNextPolicies(ppi) if(isLowerBound) Prune( JPolValPool, max(lowerBound) ) poolOfNextPolicies = SelectPoliciesToProcessFurther(poolOfNextPolicies); JPolValPool.insert(poolOfNextPolicies) while !empty JPolValPool */ void GeneralizedMAAStarPlanner::Plan() { if( _m_foundPolicy != 0) delete _m_foundPolicy; StartTimer("GMAA::Plan"); //stuff for timing (if used) tms ts_start; //the time struct clock_t tck_start; //ticks tck_start = times(&ts_start); //the intermediate timing stream ofstream & its = *_m_intermediateResultFile; //this counter maintains the maximum policy pool size. _m_maxJPolPoolSize = 0; double maxLB = -DBL_MAX; PartialJointPolicyDiscretePure * bestJPol = NewJPol(); bestJPol->SetPastReward(-DBL_MAX); #if DEBUG_GMAA_POLS cout << "GMAA initialized with empty policy:"<<endl; bestJPol->Print(); #endif PartialPolicyPoolInterface * pp_p = NewPP(); pp_p->Init( GetThisFromMostDerivedPU() ); //initialize with empty joint policy do { StartTimer("GMAA::Plan::iteration"); if(_m_saveIntermediateTiming) SaveTimers(_m_intermediateTimingFilename); if(_m_verboseness >= 2) { cout << "\n---------------------------------------------------\n"; cout << "-->>Start of new GMAA iteration, polpool size="<< pp_p->Size()<<"<<--"<<endl; } PartialPolicyPoolItemInterface* ppi = pp_p->Select(); PartialJointPolicyDiscretePure * jpol_sel = ppi->GetJPol(); double v_sel = ppi->GetValue(); size_t depth_sel = jpol_sel->GetDepth(); if(_m_verboseness >= 3) { cout << "Select returned the following policy to expand:\n"; ppi->GetJPol()->Print(); cout << "of depth="<< depth_sel << " and heur. val="<<v_sel<<endl; } if( (v_sel + _m_slack) < maxLB) //the highest upperbound < the best lower { //TODO: // 1)if JPolValPool is no priority queue, this should be changed. // 2)this is only necessary, because PRUNE (see todo below) is nyi if(_m_verboseness >= 0) cout<<"highest upper < best found lower bound, stopping\n"; break; } //poolOfNextPolicies = {<pol,vals>} //isLowerBound = bool - whether the vals are lower bounds to the // optimal value (i.e. value for the optimal policy) //<poolOfNextPolicies,isLowerBound>=ConstructAndValuateNextPolicies(ppi) PartialPolicyPoolInterface * poolOfNextPolicies = NewPP(); bool are_LBs = ConstructAndValuateNextPolicies(ppi, poolOfNextPolicies); //Clean up ppi if(pp_p->Size() > 0) //should always be true { pp_p->Pop(); delete ppi; } else //should not happen throw E("GeneralizedMAAStarPlanner.cpp:policy pool empty? - should not happen?"); #if DEBUG_GMAA4 if(DEBUG_GMAA4){ cout << "\n>>>The next policies found, poolOfNextPolicies:"<<endl; PartialPolicyPoolInterface* pp_copy = NewPP(); *pp_copy = *poolOfNextPolicies; while(! pp_copy->Empty()) { PartialPolicyPoolItemInterface* it = pp_copy->Select(); it->Print(); cout << endl; pp_copy->Pop(); } } #endif //if(isLowerBound) // Prune( JPolValPool, max(lowerBound) ) if(are_LBs && poolOfNextPolicies->Size() > 0) { PartialPolicyPoolItemInterface* bestRanked_ppi = poolOfNextPolicies-> GetBestRanked(); poolOfNextPolicies->PopBestRanked(); double bestNextVal = bestRanked_ppi->GetValue(); if(bestNextVal > maxLB) //new best lowerbound (and policy) found { maxLB = bestNextVal; *bestJPol = *(bestRanked_ppi->GetJPol()); if(_m_verboseness >= 2) { cout << "new bestJPol (and max. lowerbound) found!" << endl; cout << "Its value v=" << bestNextVal <<" - " << bestRanked_ppi->GetJPol()->SoftPrintBrief() << endl; } if(_m_verboseness >= 3) cout << "new bestJPol->SoftPrint():"<<bestJPol->SoftPrint(); //if we maintain the internal timings... if(_m_intermediateResultFile != 0) { tms ts_cur; clock_t tck_cur; tck_cur = times(&ts_cur); clock_t diff = tck_cur - tck_start; its << diff << "\t" << maxLB << endl; } // prune JPolValPool pp_p->Prune(maxLB - _m_slack ); } delete bestRanked_ppi; } SelectPoliciesToProcessFurther(poolOfNextPolicies, are_LBs, maxLB - _m_slack); pp_p->Union(poolOfNextPolicies); delete poolOfNextPolicies; if( _m_maxJPolPoolSize < pp_p->Size()) _m_maxJPolPoolSize = pp_p->Size(); StopTimer("GMAA::Plan::iteration"); if(_m_verboseness >= 2) { cout << "\nGMAA::Plan::iteration ending, best policy found so far:"; cout << endl << bestJPol->SoftPrintBrief() <<endl; if(_m_verboseness >= 3) cout << endl << bestJPol->SoftPrint() <<endl; } } while(! pp_p->Empty() ); //<- end do...while //we don't want to do any conversions here... takes (sometimes too much) //time... _m_foundPolicy=bestJPol; //->ToJointPolicyPureVector()); _m_expectedRewardFoundPolicy=maxLB; if(_m_verboseness >= 1) { cout << "\nGMAA::Plan ending, best policy found: "; cout << bestJPol->SoftPrintBrief() << " = " <<endl; if(_m_verboseness >= 3) cout << _m_foundPolicy->SoftPrint() << endl; cout << endl; #if 0 JointPolicyPureVector* jppv = _m_foundPolicy->ToJointPolicyPureVector(); jppv->Print(); #endif } if(_m_verboseness >= 2) cout << "\n\n "; if(_m_verboseness >= 0) cout << "GMAA::Plan GMAA ENDED"<<endl; if(_m_verboseness >= 2) cout << "\n\n "; delete pp_p; StopTimer("GMAA::Plan"); }