QFunctionsDiscrete PerseusBGPlanner::BackupStageAll(const BeliefSet &S, const QFunctionsDiscrete &Q) const { vector<double> VB=BeliefValue::GetValues(S,Q), VBalpha; int nrB=VB.size(), nrS=GetPU()->GetNrStates(); QFunctionsDiscrete Q1(Q.size()); AlphaVector alpha(nrS); ValueFunctionPOMDPDiscrete V=QFunctionsToValueFunction(Q); GaoVectorSet Gao=BackupStageLeadIn(V); ValueFunctionPOMDPDiscrete Qalphas; for(int k=0;k<nrB;++k) { Qalphas.clear(); for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a) { // backup the belief alpha=BeliefBackup(*S[k],a,Gao,V,_m_backupType); // add alpha to Q1 if(!VectorIsInValueFunction(alpha,Q1[a])) Q1[a].push_back(alpha); Qalphas.push_back(alpha); } if(GetVerbose()) { VBalpha=BeliefValue::GetValues(S,Qalphas); cout << "Added vectors for " << k << " (V " << VBalpha[k] << ")" << endl; } } BackupStageLeadOut(Gao); return(Q1); }
void MonahanPOMDPPlanner::MonahanCrossSum(const GaoVectorSet &G, QFunctionsDiscrete &Q, Index a, bool doIncPrune, size_t maxNrAlphas) const { Index nrS=GetPU()->GetNrStates(), nrO=GetPU()->GetNrJointObservations(); AlphaVector alpha(nrS); #if DEBUG_AlphaVectorPlanning_CrossSum cout << "AlphaVectorPlanning::MonahanCrossSum for action " << a << endl; #endif // initialize with the number of vectors already computed for other time steps size_t nrVectorsComputed=GetNrVectors(); // Do the cross-sums, creates G_a of (3.25) VectorSet Ga=*G[a][0]; for(Index o=1; o!=nrO; o++) { if(doIncPrune) { VectorSet Ga2=CrossSum(Ga,*G[a][o]); #if 0 VectorSet Ga1=Prune(Ga2,maxNrAlphas); Ga=Ga1; #else Ga=Prune(Ga2); #endif } else { VectorSet Ga1=CrossSum(Ga,*G[a][o]); Ga=Ga1; } #if DEBUG_AlphaVectorPlanning_CrossSum if(maxNrAlphas) cout << "AlphaVectorPlanning::MonahanCrossSum nrAlphas " << Ga.size1() << " (max " << maxNrAlphas << ")" << endl; #endif CheckMaxNrVectors(maxNrAlphas,nrVectorsComputed+Ga.size1()); } // Add the resulting vectors to V (HV_n of (3.25)) for(Index k=0; k!=Ga.size1(); ++k) { alpha.SetAction(a); for(Index s=0; s!=nrS; s++) alpha.SetValue(Ga(k,s),s); Q[a].push_back(alpha); nrVectorsComputed++; } if(!doIncPrune) { Q[a]=Prune(Q[a]); CheckMaxNrVectors(maxNrAlphas,nrVectorsComputed+Q[a].size()); // THIS IS WRONG throw(E("MonahanPOMDPPlanner fix error")); if(maxNrAlphas && Q.size()>maxNrAlphas) { stringstream ss; ss << "AlphaVectorPlanning::MonahanCrossSum() too many alpha vectors " << Q.size() << ">" << maxNrAlphas; throw(E(ss.str())); } } }
QFunctionsDiscrete PerseusBGPlanner::BackupStageSamplingAlt(const BeliefSet &S, const QFunctionsDiscrete &Q) const { vector<double> VB=BeliefValue::GetValues(S,Q), VBalpha; int nrB=VB.size(), nrNotImproved=nrB, nrS=GetPU()->GetNrStates(), k; vector<bool> stillNeedToBeImproved(nrB,true); QFunctionsDiscrete Q1(Q.size()); AlphaVector alpha(nrS); ValueFunctionPOMDPDiscrete V=QFunctionsToValueFunction(Q); GaoVectorSet Gao=BackupStageLeadIn(V); ValueFunctionPOMDPDiscrete Qalphas; k=-1; while(nrNotImproved!=0) { Qalphas.clear(); for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a) { // sample a belief index from the number of not improved beliefs k=SampleNotImprovedBeliefIndex(stillNeedToBeImproved,nrNotImproved); // backup the belief alpha=BeliefBackup(*S[k],a,Gao,V,_m_backupType); // check whether alpha improves the value of S[k] double x=S[k]->InnerProduct(alpha.GetValues()); // if not, get copy from old value function if(x<VB[k]) //QB[a][k])//VB[k]) { #if DEBUG_PerseusBGPlanner cout << "Getting n-1 vector for action " << a << ", belief " << k << " (" << x << " < " << VB[k] << ")" << endl; #endif alpha=BeliefValue::GetMaximizingVector(S,k,Q[a]); } else { if(GetVerbose()) cout << "Added vector for action " << a << ", belief " << k << " (Q " << x << " >= " << VB[k] << ")" << endl; } // add alpha to Q1 if(!VectorIsInValueFunction(alpha,Q1[a])) Q1[a].push_back(alpha); Qalphas.push_back(alpha); } // update which beliefs have been improved VBalpha=BeliefValue::GetValues(S,Qalphas); int nrImprovedByAlpha=0; for(int b=0;b!=nrB;b++) if(stillNeedToBeImproved[b] && VBalpha[b]>=VB[b]) { stillNeedToBeImproved[b]=false; nrNotImproved--; nrImprovedByAlpha++; } if(GetVerbose()) cout << "Added vectors for " << k << " (V " << VBalpha[k] << " improved " << nrImprovedByAlpha << ")" << endl; } BackupStageLeadOut(Gao); return(Q1); }