void AgentBG::ResetEpisode() { _m_t=0; JointBeliefInterface* jbi = GetPU()->GetNewJointBeliefFromISD(); _m_prevJB.Set(jbi->Get()); delete jbi; _m_oIs.clear(); _m_prevJoIs.clear(); _m_prevJaIs.clear(); _m_aIs.clear(); }
double BayesianGameForDecPOMDPStage:: ComputeImmediateReward(Index jtI, Index jaI) const { //Index jaohI = _m_jaohReps.at(jtI); JointBeliefInterface* jb = _m_JBs.at(jtI); BeliefIteratorGeneric bit = jb->GetIterator(); double r = 0.0; do{ Index s = bit.GetStateIndex(); double p = bit.GetProbability(); r += p * _m_pu->GetReward(s, jaI); }while (bit.Next() ); return r; }
AlphaVector::BGPolicyIndex AgentBG::GetMaximizingBGIndex(const JointBeliefInterface &jb) const { double v=-DBL_MAX,q; AlphaVector::BGPolicyIndex bI,betaMaxI=INT_MAX; for(Index a=0;a!=GetPU()->GetNrJointActions();++a) { q=_m_QBGstationary->GetQ(jb,a,bI); if(q>v) { v=q; betaMaxI=bI; } } #if DEBUG_AgentBG cout << "GetMaximizingBGIndex " << GetIndex() << ": betaMaxI " << betaMaxI << " " << jb.SoftPrint() << endl; #endif return(betaMaxI); }
/**Calculates the jaohI corresponding to jaI_arr and joI_arr and also * returnes the P(jaohI) and the expected obtained reward for previous * time steps GIVEN this joint action history. * * \todo TODO add following to .h * * basically this function is a form of * PlanningUnitMADPDiscrete::GetJAOHProbs(Recursively) * that also computes the reward. * * */ void BayesianGameForDecPOMDPStage::ProbRewardForjoahI( //input args Index ts, Index jtI, Index* jaI_arr,Index* joI_arr, //output args Index& jaohI, double& PjaohI, double& ExpR_0_prevTS_thisJAOH ) { //first we get the initial jaoh JointActionObservationHistoryTree * jaoht = _m_pu->GetJointActionObservationHistoryTree(0); double CPjaohI = 1.0; PjaohI = CPjaohI; // == 1.0 // get the initaal belief JointBeliefInterface* jb = _m_pu->GetNewJointBeliefFromISD(); Index tI = 0; while(tI < ts) { //calculate the R for tI double ExpR_0_prevTS_thisJAOH_thisT = 0.0; #if USE_BeliefIteratorGeneric BeliefIteratorGeneric it=jb->GetIterator(); do { double R_si_ja = _m_pu->GetReward(it.GetStateIndex(), jaI_arr[tI]); if(DEBUG_BG4DECPOMDP4) cout << "R(s="<<it.GetStateIndex()<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n"; ExpR_0_prevTS_thisJAOH_thisT += it.GetProbability() * R_si_ja; } while(it.Next()); #else for(Index sI=0; sI < _m_pu->GetNrStates(); sI++) { double R_si_ja = _m_pu->GetReward(sI, jaI_arr[tI]); if(DEBUG_BG4DECPOMDP4) cout << "R(s="<<sI<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n"; ExpR_0_prevTS_thisJAOH_thisT += jb->Get(sI) * R_si_ja; } #endif ExpR_0_prevTS_thisJAOH += ExpR_0_prevTS_thisJAOH_thisT; if(DEBUG_BG4DECPOMDP4) { cout << "calculating expected reward R(oaHist,a) for tI="<<tI <<"oaHist:"; jaoht->GetJointActionObservationHistory()->Print(); cout << endl; cout << "R(b,a) (exp reward for jtI=" << jtI << ", tI="<<tI<<") is "<< ExpR_0_prevTS_thisJAOH_thisT <<endl; } jaoht = jaoht->GetSuccessor( jaI_arr[tI], joI_arr[tI] ); jaohI = jaoht->GetIndex(); CPjaohI = jb->Update( *_m_pu->GetReferred(), jaI_arr[tI], joI_arr[tI] ); PjaohI = PjaohI * CPjaohI; tI++; } delete jb; //free the belief allocated with 'new' if(DEBUG_BG4DECPOMDP4) { cout << "expected previous reward (up to ts-1) for (jtI=" << jtI << ") "; jaoht->GetJointActionObservationHistory()->Print(); cout << " is "<< ExpR_0_prevTS_thisJAOH <<endl << endl; } }
/// Export is in Tony Cassandra's POMDP file format. void AlphaVectorPlanning::ExportPOMDPFile(const string & filename, const PlanningUnitDecPOMDPDiscrete *pu) { int nrA=pu->GetNrJointActions(), nrO=pu->GetNrJointObservations(), nrS=pu->GetNrStates(); ofstream fp(filename.c_str()); if(!fp) { cerr << "AlphaVectorPOMDP::ExportPOMDPFile: failed to open file " << filename << endl; } fp << "discount: " << pu->GetDiscount() << endl; switch(pu->GetReferred()->GetRewardType()) { case REWARD: fp << "values: reward" << endl; break; case COST: fp << "values: cost" << endl; } fp << "states:"; for(int s=0;s<nrS;s++) fp << " " << pu->GetReferred()->GetState(s)->SoftPrintBrief(); fp << endl; fp << "actions:"; for(int a=0;a<nrA;a++) fp << " " << pu->GetReferred()->GetJointAction(a)->SoftPrintBrief(); fp << endl; fp << "observations:"; for(int o=0;o<nrO;o++) fp << " " << pu->GetReferred()->GetJointObservation(o)-> SoftPrintBrief(); fp << endl; JointBeliefInterface* isd = pu->GetNewJointBeliefFromISD(); fp << "start: "; for(int s0=0;s0<nrS;s0++) { double bs = isd->Get(s0); fp << bs << " "; } fp << endl; delete isd; double p; for(int a=0;a<nrA;a++) for(int s0=0;s0<nrS;s0++) for(int s1=0;s1<nrS;s1++) { p=pu->GetTransitionProbability(s0,a,s1); if(p!=0) fp << "T: " << a << " : " << s0 << " : " << s1 << " " << p << endl; } for(int a=0;a<nrA;a++) for(int o=0;o<nrO;o++) for(int s1=0;s1<nrS;s1++) { p=pu->GetObservationProbability(a,s1,o); if(p!=0) fp << "O: " << a << " : " << s1 << " : " << o << " " << p << endl; } for(int a=0;a<nrA;a++) for(int s0=0;s0<nrS;s0++) { p=pu->GetReward(s0,a); if(p!=0) fp << "R: " << a << " : " << s0 << " : * : * " << p << endl; } }