/**Calculates the jaohI corresponding to jaI_arr and joI_arr and also * returnes the P(jaohI) and the expected obtained reward for previous * time steps GIVEN this joint action history. * * \todo TODO add following to .h * * basically this function is a form of * PlanningUnitMADPDiscrete::GetJAOHProbs(Recursively) * that also computes the reward. * * */ void BayesianGameForDecPOMDPStage::ProbRewardForjoahI( //input args Index ts, Index jtI, Index* jaI_arr,Index* joI_arr, //output args Index& jaohI, double& PjaohI, double& ExpR_0_prevTS_thisJAOH ) { //first we get the initial jaoh JointActionObservationHistoryTree * jaoht = _m_pu->GetJointActionObservationHistoryTree(0); double CPjaohI = 1.0; PjaohI = CPjaohI; // == 1.0 // get the initaal belief JointBeliefInterface* jb = _m_pu->GetNewJointBeliefFromISD(); Index tI = 0; while(tI < ts) { //calculate the R for tI double ExpR_0_prevTS_thisJAOH_thisT = 0.0; #if USE_BeliefIteratorGeneric BeliefIteratorGeneric it=jb->GetIterator(); do { double R_si_ja = _m_pu->GetReward(it.GetStateIndex(), jaI_arr[tI]); if(DEBUG_BG4DECPOMDP4) cout << "R(s="<<it.GetStateIndex()<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n"; ExpR_0_prevTS_thisJAOH_thisT += it.GetProbability() * R_si_ja; } while(it.Next()); #else for(Index sI=0; sI < _m_pu->GetNrStates(); sI++) { double R_si_ja = _m_pu->GetReward(sI, jaI_arr[tI]); if(DEBUG_BG4DECPOMDP4) cout << "R(s="<<sI<<",ja="<<jaI_arr[tI]<<")="<< R_si_ja << "\n"; ExpR_0_prevTS_thisJAOH_thisT += jb->Get(sI) * R_si_ja; } #endif ExpR_0_prevTS_thisJAOH += ExpR_0_prevTS_thisJAOH_thisT; if(DEBUG_BG4DECPOMDP4) { cout << "calculating expected reward R(oaHist,a) for tI="<<tI <<"oaHist:"; jaoht->GetJointActionObservationHistory()->Print(); cout << endl; cout << "R(b,a) (exp reward for jtI=" << jtI << ", tI="<<tI<<") is "<< ExpR_0_prevTS_thisJAOH_thisT <<endl; } jaoht = jaoht->GetSuccessor( jaI_arr[tI], joI_arr[tI] ); jaohI = jaoht->GetIndex(); CPjaohI = jb->Update( *_m_pu->GetReferred(), jaI_arr[tI], joI_arr[tI] ); PjaohI = PjaohI * CPjaohI; tI++; } delete jb; //free the belief allocated with 'new' if(DEBUG_BG4DECPOMDP4) { cout << "expected previous reward (up to ts-1) for (jtI=" << jtI << ") "; jaoht->GetJointActionObservationHistory()->Print(); cout << " is "<< ExpR_0_prevTS_thisJAOH <<endl << endl; } }