//this function extends a previous policy jpolPrevTs for ts-1 with the behavior specified by the policy of the BayesianGame for time step ts (jpolBG). PartialJointPolicyDiscretePure* BayesianGameForDecPOMDPStage::ConstructExtendedPolicy( PartialJointPolicyDiscretePure& jpolPrevTs, JointPolicyDiscretePure& jpolBG, vector<size_t>& nrOHts, vector<Index>& firstOHtsI ) { //check policy types if(jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::OHIST_INDEX) throw E("BayesianGameForDecPOMDPStage::ConstructExtendedPolicy --- jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::OHIST_INDEX "); if(jpolBG.GetIndexDomainCategory() != PolicyGlobals::TYPE_INDEX) throw E("BayesianGameForDecPOMDPStage::ConstructExtendedPolicy --- jpolPrevTs.GetIndexDomainCategory() != PolicyGlobals::TYPE_INDEX "); //construct a policy for the DecPOMDP: //a copy of jpolPrevTs with extended to this time step (ts) by //jpolBG PartialJointPolicyDiscretePure* jpolTs = new PartialJointPolicyPureVector(jpolPrevTs); jpolTs->SetDepth( jpolTs->GetDepth()+1 ); for(Index agentI=0; agentI < GetNrAgents(); agentI++) { for(Index type = 0; type < nrOHts[agentI]; type++) { Index ohI = type + firstOHtsI[agentI]; jpolTs->SetAction(agentI, ohI, jpolBG.GetActionIndex(agentI, type) ); } } return(jpolTs); }
void PlanningUnitFactoredDecPOMDPDiscrete:: ComputeFSAOHDist( FactoredStateAOHDistribution* fsaoh, const PartialJointPolicyDiscretePure& pJPol) const { Index depth = pJPol.GetDepth(); //cout << "computing factored FSAOHDist for policy of depth"<< depth<<endl; fsaoh->InitializeFromISD( GetFDPOMDPD()->GetFactoredISD() ); fsaoh->SanityCheck(); Index lastStage; if(depth > 0) //depth 1 specifies action for t=0 -> compute FSAOH for t=1 lastStage = depth; else return; //cerr<<"starting updates from t=0 to t<"<<lastStage<<endl; for(Index t=0 ; t < lastStage; t++) fsaoh->Update(pJPol); }
/* this is the high-level pseudo-code for what happens: start with a horizon 0 joint policy - i.e. specifying 0 actions JPolValPool.push( <jpol,val=0.0> ) do ppi = <pol,val> = JPolValPool.GetNext() //poolOfNextPolicies = {<pol,vals>} //isLowerBound = bool - whether the vals are lower bounds to the // optimal value (i.e. value for the optimal policy) <poolOfNextPolicies, isLowerBound> = ConstructAndValuateNextPolicies(ppi) if(isLowerBound) Prune( JPolValPool, max(lowerBound) ) poolOfNextPolicies = SelectPoliciesToProcessFurther(poolOfNextPolicies); JPolValPool.insert(poolOfNextPolicies) while !empty JPolValPool */ void GeneralizedMAAStarPlanner::Plan() { if( _m_foundPolicy != 0) delete _m_foundPolicy; StartTimer("GMAA::Plan"); //stuff for timing (if used) tms ts_start; //the time struct clock_t tck_start; //ticks tck_start = times(&ts_start); //the intermediate timing stream ofstream & its = *_m_intermediateResultFile; //this counter maintains the maximum policy pool size. _m_maxJPolPoolSize = 0; double maxLB = -DBL_MAX; PartialJointPolicyDiscretePure * bestJPol = NewJPol(); bestJPol->SetPastReward(-DBL_MAX); #if DEBUG_GMAA_POLS cout << "GMAA initialized with empty policy:"<<endl; bestJPol->Print(); #endif PartialPolicyPoolInterface * pp_p = NewPP(); pp_p->Init( GetThisFromMostDerivedPU() ); //initialize with empty joint policy do { StartTimer("GMAA::Plan::iteration"); if(_m_saveIntermediateTiming) SaveTimers(_m_intermediateTimingFilename); if(_m_verboseness >= 2) { cout << "\n---------------------------------------------------\n"; cout << "-->>Start of new GMAA iteration, polpool size="<< pp_p->Size()<<"<<--"<<endl; } PartialPolicyPoolItemInterface* ppi = pp_p->Select(); PartialJointPolicyDiscretePure * jpol_sel = ppi->GetJPol(); double v_sel = ppi->GetValue(); size_t depth_sel = jpol_sel->GetDepth(); if(_m_verboseness >= 3) { cout << "Select returned the following policy to expand:\n"; ppi->GetJPol()->Print(); cout << "of depth="<< depth_sel << " and heur. val="<<v_sel<<endl; } if( (v_sel + _m_slack) < maxLB) //the highest upperbound < the best lower { //TODO: // 1)if JPolValPool is no priority queue, this should be changed. // 2)this is only necessary, because PRUNE (see todo below) is nyi if(_m_verboseness >= 0) cout<<"highest upper < best found lower bound, stopping\n"; break; } //poolOfNextPolicies = {<pol,vals>} //isLowerBound = bool - whether the vals are lower bounds to the // optimal value (i.e. value for the optimal policy) //<poolOfNextPolicies,isLowerBound>=ConstructAndValuateNextPolicies(ppi) PartialPolicyPoolInterface * poolOfNextPolicies = NewPP(); bool are_LBs = ConstructAndValuateNextPolicies(ppi, poolOfNextPolicies); //Clean up ppi if(pp_p->Size() > 0) //should always be true { pp_p->Pop(); delete ppi; } else //should not happen throw E("GeneralizedMAAStarPlanner.cpp:policy pool empty? - should not happen?"); #if DEBUG_GMAA4 if(DEBUG_GMAA4){ cout << "\n>>>The next policies found, poolOfNextPolicies:"<<endl; PartialPolicyPoolInterface* pp_copy = NewPP(); *pp_copy = *poolOfNextPolicies; while(! pp_copy->Empty()) { PartialPolicyPoolItemInterface* it = pp_copy->Select(); it->Print(); cout << endl; pp_copy->Pop(); } } #endif //if(isLowerBound) // Prune( JPolValPool, max(lowerBound) ) if(are_LBs && poolOfNextPolicies->Size() > 0) { PartialPolicyPoolItemInterface* bestRanked_ppi = poolOfNextPolicies-> GetBestRanked(); poolOfNextPolicies->PopBestRanked(); double bestNextVal = bestRanked_ppi->GetValue(); if(bestNextVal > maxLB) //new best lowerbound (and policy) found { maxLB = bestNextVal; *bestJPol = *(bestRanked_ppi->GetJPol()); if(_m_verboseness >= 2) { cout << "new bestJPol (and max. lowerbound) found!" << endl; cout << "Its value v=" << bestNextVal <<" - " << bestRanked_ppi->GetJPol()->SoftPrintBrief() << endl; } if(_m_verboseness >= 3) cout << "new bestJPol->SoftPrint():"<<bestJPol->SoftPrint(); //if we maintain the internal timings... if(_m_intermediateResultFile != 0) { tms ts_cur; clock_t tck_cur; tck_cur = times(&ts_cur); clock_t diff = tck_cur - tck_start; its << diff << "\t" << maxLB << endl; } // prune JPolValPool pp_p->Prune(maxLB - _m_slack ); } delete bestRanked_ppi; } SelectPoliciesToProcessFurther(poolOfNextPolicies, are_LBs, maxLB - _m_slack); pp_p->Union(poolOfNextPolicies); delete poolOfNextPolicies; if( _m_maxJPolPoolSize < pp_p->Size()) _m_maxJPolPoolSize = pp_p->Size(); StopTimer("GMAA::Plan::iteration"); if(_m_verboseness >= 2) { cout << "\nGMAA::Plan::iteration ending, best policy found so far:"; cout << endl << bestJPol->SoftPrintBrief() <<endl; if(_m_verboseness >= 3) cout << endl << bestJPol->SoftPrint() <<endl; } } while(! pp_p->Empty() ); //<- end do...while //we don't want to do any conversions here... takes (sometimes too much) //time... _m_foundPolicy=bestJPol; //->ToJointPolicyPureVector()); _m_expectedRewardFoundPolicy=maxLB; if(_m_verboseness >= 1) { cout << "\nGMAA::Plan ending, best policy found: "; cout << bestJPol->SoftPrintBrief() << " = " <<endl; if(_m_verboseness >= 3) cout << _m_foundPolicy->SoftPrint() << endl; cout << endl; #if 0 JointPolicyPureVector* jppv = _m_foundPolicy->ToJointPolicyPureVector(); jppv->Print(); #endif } if(_m_verboseness >= 2) cout << "\n\n "; if(_m_verboseness >= 0) cout << "GMAA::Plan GMAA ENDED"<<endl; if(_m_verboseness >= 2) cout << "\n\n "; delete pp_p; StopTimer("GMAA::Plan"); }