ValueFunctionPOMDPDiscrete AlphaVectorPlanning::QFunctionsToValueFunction(const QFunctionsDiscrete &Q) { ValueFunctionPOMDPDiscrete V; for(QFDcit i=Q.begin();i!=Q.end();++i) for(VFPDcit j=i->begin();j!=i->end();++j) V.push_back(*j); return(V); }
double BeliefValue::GetValue(const BeliefInterface &Belief, const QFunctionsDiscrete &Q) { double x,maxVal=-DBL_MAX; for(QFDcit i=Q.begin();i!=Q.end();++i) { x=GetValue(Belief,*i); if(x>maxVal) maxVal=x; } return(maxVal); }
vector<double> BeliefValue::GetValues(const BeliefSet &Beliefs, const QFunctionsDiscrete &Q) { vector<double> values(Beliefs.size()), maxValues(Beliefs.size(),-DBL_MAX); for(QFDcit i=Q.begin();i!=Q.end();++i) { values=GetValues(Beliefs,*i); for(unsigned int j=0;j!=values.size();++j) if(values[j]>maxValues[j]) maxValues[j]=values[j]; } return(maxValues); }
void QAlphaVector::Initialize() { size_t nrS=GetPU()->GetNrStates(); size_t nrA=GetPU()->GetNrJointActions(); size_t h=GetPU()->GetHorizon(); #if 0 // this was code to convert alpha vectors coming from an infinite-horizon method ValueFunctionPOMDPDiscrete V=AlphaVectorPlanning::ImportValueFunction(_m_filename); cout << SoftPrint(V) << endl; QFunctionsDiscrete Q = AlphaVectorPlanning::ValueFunctionToQ(V,nrA,nrS); _m_Q.resize(h); for(Index t=0;t!=h;++t) { _m_Q.at(t).resize(nrA); for(Index a=0;a!=nrA;++a) for(Index i=0;i!=Q.at(a).size();++i) { AlphaVector alpha(nrS); alpha.SetAction(a); for(Index s=0;s!=nrS;++s) { double value=Q.at(a).at(i).GetValue(s+(nrS*t)); alpha.SetValue(value,s); } _m_Q.at(t).at(a).push_back(alpha); cout << t << " " << a << " " << alpha.SoftPrint() << endl; } } #else for(Index t=0;t!=h;++t) { stringstream ss; ss << _m_filename << "_t" << t; ValueFunctionPOMDPDiscrete V=AlphaVectorPlanning::ImportValueFunction(ss.str()); _m_Q.push_back(AlphaVectorPlanning::ValueFunctionToQ(V,nrA,nrS)); } #endif }
QFunctionsDiscrete AlphaVectorPlanning::ValueFunctionToQ(const ValueFunctionPOMDPDiscrete &V, size_t nrA, size_t nrS) { QFunctionsDiscrete Qs; for(Index a=0;a!=nrA;a++) { ValueFunctionPOMDPDiscrete Q; for(Index i=0;i!=V.size();i++) { if(V[i].GetAction()==a) Q.push_back(V[i]); } // if the action has no vector, it's dominated everywhere, so // must never be chosen if(Q.size()==0) { AlphaVector dominatedVector(nrS,-DBL_MAX); dominatedVector.SetAction(a); Q.push_back(dominatedVector); #if DEBUG_AlphaVectorPlanning_ValueFunctionToQ cout << "AlphaVectorPlanning::GetQFunctionsFromV: action " << a << " is dominated" << endl; #endif } else { #if DEBUG_AlphaVectorPlanning_ValueFunctionToQ cout << "AlphaVectorPlanning::GetQFunctionsFromV: action " << a << " has " << Q.size() << " vector(s) " << endl; #endif } Qs.push_back(Q); } return(Qs); }
QFunctionsDiscrete PerseusBGPlanner::BackupStageAll(const BeliefSet &S, const QFunctionsDiscrete &Q) const { vector<double> VB=BeliefValue::GetValues(S,Q), VBalpha; int nrB=VB.size(), nrS=GetPU()->GetNrStates(); QFunctionsDiscrete Q1(Q.size()); AlphaVector alpha(nrS); ValueFunctionPOMDPDiscrete V=QFunctionsToValueFunction(Q); GaoVectorSet Gao=BackupStageLeadIn(V); ValueFunctionPOMDPDiscrete Qalphas; for(int k=0;k<nrB;++k) { Qalphas.clear(); for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a) { // backup the belief alpha=BeliefBackup(*S[k],a,Gao,V,_m_backupType); // add alpha to Q1 if(!VectorIsInValueFunction(alpha,Q1[a])) Q1[a].push_back(alpha); Qalphas.push_back(alpha); } if(GetVerbose()) { VBalpha=BeliefValue::GetValues(S,Qalphas); cout << "Added vectors for " << k << " (V " << VBalpha[k] << ")" << endl; } } BackupStageLeadOut(Gao); return(Q1); }
void MonahanPOMDPPlanner::MonahanCrossSum(const GaoVectorSet &G, QFunctionsDiscrete &Q, Index a, bool doIncPrune, size_t maxNrAlphas) const { Index nrS=GetPU()->GetNrStates(), nrO=GetPU()->GetNrJointObservations(); AlphaVector alpha(nrS); #if DEBUG_AlphaVectorPlanning_CrossSum cout << "AlphaVectorPlanning::MonahanCrossSum for action " << a << endl; #endif // initialize with the number of vectors already computed for other time steps size_t nrVectorsComputed=GetNrVectors(); // Do the cross-sums, creates G_a of (3.25) VectorSet Ga=*G[a][0]; for(Index o=1; o!=nrO; o++) { if(doIncPrune) { VectorSet Ga2=CrossSum(Ga,*G[a][o]); #if 0 VectorSet Ga1=Prune(Ga2,maxNrAlphas); Ga=Ga1; #else Ga=Prune(Ga2); #endif } else { VectorSet Ga1=CrossSum(Ga,*G[a][o]); Ga=Ga1; } #if DEBUG_AlphaVectorPlanning_CrossSum if(maxNrAlphas) cout << "AlphaVectorPlanning::MonahanCrossSum nrAlphas " << Ga.size1() << " (max " << maxNrAlphas << ")" << endl; #endif CheckMaxNrVectors(maxNrAlphas,nrVectorsComputed+Ga.size1()); } // Add the resulting vectors to V (HV_n of (3.25)) for(Index k=0; k!=Ga.size1(); ++k) { alpha.SetAction(a); for(Index s=0; s!=nrS; s++) alpha.SetValue(Ga(k,s),s); Q[a].push_back(alpha); nrVectorsComputed++; } if(!doIncPrune) { Q[a]=Prune(Q[a]); CheckMaxNrVectors(maxNrAlphas,nrVectorsComputed+Q[a].size()); // THIS IS WRONG throw(E("MonahanPOMDPPlanner fix error")); if(maxNrAlphas && Q.size()>maxNrAlphas) { stringstream ss; ss << "AlphaVectorPlanning::MonahanCrossSum() too many alpha vectors " << Q.size() << ">" << maxNrAlphas; throw(E(ss.str())); } } }
QFunctionsDiscrete PerseusBGPlanner::BackupStageSamplingAlt(const BeliefSet &S, const QFunctionsDiscrete &Q) const { vector<double> VB=BeliefValue::GetValues(S,Q), VBalpha; int nrB=VB.size(), nrNotImproved=nrB, nrS=GetPU()->GetNrStates(), k; vector<bool> stillNeedToBeImproved(nrB,true); QFunctionsDiscrete Q1(Q.size()); AlphaVector alpha(nrS); ValueFunctionPOMDPDiscrete V=QFunctionsToValueFunction(Q); GaoVectorSet Gao=BackupStageLeadIn(V); ValueFunctionPOMDPDiscrete Qalphas; k=-1; while(nrNotImproved!=0) { Qalphas.clear(); for(unsigned int a=0;a!=GetPU()->GetNrJointActions();++a) { // sample a belief index from the number of not improved beliefs k=SampleNotImprovedBeliefIndex(stillNeedToBeImproved,nrNotImproved); // backup the belief alpha=BeliefBackup(*S[k],a,Gao,V,_m_backupType); // check whether alpha improves the value of S[k] double x=S[k]->InnerProduct(alpha.GetValues()); // if not, get copy from old value function if(x<VB[k]) //QB[a][k])//VB[k]) { #if DEBUG_PerseusBGPlanner cout << "Getting n-1 vector for action " << a << ", belief " << k << " (" << x << " < " << VB[k] << ")" << endl; #endif alpha=BeliefValue::GetMaximizingVector(S,k,Q[a]); } else { if(GetVerbose()) cout << "Added vector for action " << a << ", belief " << k << " (Q " << x << " >= " << VB[k] << ")" << endl; } // add alpha to Q1 if(!VectorIsInValueFunction(alpha,Q1[a])) Q1[a].push_back(alpha); Qalphas.push_back(alpha); } // update which beliefs have been improved VBalpha=BeliefValue::GetValues(S,Qalphas); int nrImprovedByAlpha=0; for(int b=0;b!=nrB;b++) if(stillNeedToBeImproved[b] && VBalpha[b]>=VB[b]) { stillNeedToBeImproved[b]=false; nrNotImproved--; nrImprovedByAlpha++; } if(GetVerbose()) cout << "Added vectors for " << k << " (V " << VBalpha[k] << " improved " << nrImprovedByAlpha << ")" << endl; } BackupStageLeadOut(Gao); return(Q1); }