void VIPlanner::PrintPolicy(FILE* fPolicy) { bool bPrintStatOnly = true; vector<CMDPSTATE*> WorkList; CMDP PolicyforEvaluation; viPlanner.iteration++; WorkList.push_back(viPlanner.StartState); ((VIState*)viPlanner.StartState->PlannerSpecificData)->iteration = viPlanner.iteration; double PolVal = 0.0; double Conf = 0; bool bCycles = false; SBPL_PRINTF("Printing policy...\n"); while((int)WorkList.size() > 0) { //pop the last state CMDPSTATE* state = WorkList.at(WorkList.size()-1); WorkList.pop_back(); VIState* statedata = (VIState*)state->PlannerSpecificData; CMDPSTATE* polstate = PolicyforEvaluation.AddState(state->StateID); //print state ID if(!bPrintStatOnly) { SBPL_FPRINTF(fPolicy, "%d\n", state->StateID); environment_->PrintState(state->StateID, false, fPolicy); int h = environment_->GetGoalHeuristic(state->StateID); SBPL_FPRINTF(fPolicy, "h=%d\n", h); if(h > statedata->v) { SBPL_FPRINTF(fPolicy, "WARNING h overestimates exp.cost\n"); } } if(state->StateID == viPlanner.GoalState->StateID) { //goal state if(!bPrintStatOnly) SBPL_FPRINTF(fPolicy, "0\n"); Conf += ((VIState*)state->PlannerSpecificData)->Pc; } else if(statedata->bestnextaction == NULL) { //unexplored if(!bPrintStatOnly) { //no outcome explored - stay in the same place SBPL_FPRINTF(fPolicy, "%d %d %d\n", 1, 0, state->StateID); } } else { //get best action CMDPACTION* action = statedata->bestnextaction; //add action to evaluation MDP CMDPACTION* polaction = polstate->AddAction(action->ActionID); if(!bPrintStatOnly) SBPL_FPRINTF(fPolicy, "%d ", (unsigned int)action->SuccsID.size()); //print successors and insert them into the list for(int i = 0; i < (int)action->SuccsID.size(); i++) { if(!bPrintStatOnly) SBPL_FPRINTF(fPolicy, "%d %d ", action->Costs[i], action->SuccsID[i]); polaction->AddOutcome(action->SuccsID[i], action->Costs[i], action->SuccsProb[i]); CMDPSTATE* succstate = GetState(action->SuccsID[i]); if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration) { ((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration; WorkList.push_back(succstate); ((VIState*)succstate->PlannerSpecificData)->Pc = action->SuccsProb[i]*((VIState*)state->PlannerSpecificData)->Pc; PolVal += ((VIState*)succstate->PlannerSpecificData)->Pc*action->Costs[i]; } } if(!bPrintStatOnly) SBPL_FPRINTF(fPolicy, "\n"); } }//while worklist not empty SBPL_PRINTF("done\n"); //now evaluate the policy double PolicyValue = -1; bool bFullPolicy = false; double Pcgoal = -1; int nMerges = 0; EvaluatePolicy(&PolicyforEvaluation, viPlanner.StartState->StateID, viPlanner.GoalState->StateID, &PolicyValue, &bFullPolicy, &Pcgoal, &nMerges, &bCycles); SBPL_PRINTF("Policy value = %f FullPolicy=%d Merges=%d Cycles=%d\n", PolicyValue, bFullPolicy, nMerges, bCycles); if(!bFullPolicy) SBPL_PRINTF("WARN: POLICY IS ONLY PARTIAL\n"); if(fabs(PolicyValue-((VIState*)(viPlanner.StartState->PlannerSpecificData))->v) > MDP_ERRDELTA) SBPL_PRINTF("WARN: POLICY VALUE IS NOT CORRECT\n"); if(!bPrintStatOnly) SBPL_FPRINTF(fPolicy, "backups=%d runtime=%f vstart=%f policyvalue=%f fullpolicy=%d Pc(goal)=%f nMerges=%d bCyc=%d\n", g_backups, (double)g_runtime/CLOCKS_PER_SEC, ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v, PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles); else SBPL_FPRINTF(fPolicy, "%d %f %f %f %d %f %d %d\n", g_backups, (double)g_runtime/CLOCKS_PER_SEC, ((VIState*)(viPlanner.StartState->PlannerSpecificData))->v, PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles); }
void EnvironmentNAV2D::SetAllActionsandAllOutcomes(CMDPSTATE* state) { int cost; #if DEBUG if(state->StateID >= (int)EnvNAV2D.StateID2CoordTable.size()) { SBPL_ERROR("ERROR in Env... function: stateID illegal\n"); throw new SBPL_Exception(); } if((int)state->Actions.size() != 0) { SBPL_ERROR("ERROR in Env_setAllActionsandAllOutcomes: actions already exist for the state\n"); throw new SBPL_Exception(); } #endif //goal state should be absorbing if(state->StateID == EnvNAV2D.goalstateid) return; //get X, Y for the state EnvNAV2DHashEntry_t* HashEntry = EnvNAV2D.StateID2CoordTable[state->StateID]; //iterate through actions bool bTestBounds = false; if(HashEntry->X <= 1 || HashEntry->X >= EnvNAV2DCfg.EnvWidth_c-2 || HashEntry->Y <= 1 || HashEntry->Y >= EnvNAV2DCfg.EnvHeight_c-2) bTestBounds = true; for (int aind = 0; aind < EnvNAV2DCfg.numofdirs; aind++) { int newX = HashEntry->X + EnvNAV2DCfg.dx_[aind]; int newY = HashEntry->Y + EnvNAV2DCfg.dy_[aind]; //skip the invalid cells if(bTestBounds){ if(!IsValidCell(newX, newY)) continue; } //compute cost multiplier int costmult = EnvNAV2DCfg.Grid2D[newX][newY]; //for diagonal move, take max over adjacent cells if(newX != HashEntry->X && newY != HashEntry->Y && aind <= 7) { //check two more cells through which the action goes costmult = __max(costmult, EnvNAV2DCfg.Grid2D[HashEntry->X][newY]); costmult = __max(costmult, EnvNAV2DCfg.Grid2D[newX][HashEntry->Y]); } else if(aind > 7) { //check two more cells through which the action goes costmult = __max(costmult, EnvNAV2DCfg.Grid2D[HashEntry->X + EnvNAV2DCfg.dxintersects_[aind][0]][HashEntry->Y + EnvNAV2DCfg.dyintersects_[aind][0]]); costmult = __max(costmult, EnvNAV2DCfg.Grid2D[HashEntry->X + EnvNAV2DCfg.dxintersects_[aind][1]][HashEntry->Y + EnvNAV2DCfg.dyintersects_[aind][1]]); } //check that it is valid if(costmult >= EnvNAV2DCfg.obsthresh) continue; //otherwise compute the actual cost cost = (costmult+1)*EnvNAV2DCfg.dxy_distance_mm_[aind]; //add the action CMDPACTION* action = state->AddAction(aind); #if TIME_DEBUG clock_t currenttime = clock(); #endif EnvNAV2DHashEntry_t* OutHashEntry; if((OutHashEntry = GetHashEntry(newX, newY)) == NULL) { //have to create a new entry OutHashEntry = CreateNewHashEntry(newX, newY); } action->AddOutcome(OutHashEntry->stateID, cost, 1.0); #if TIME_DEBUG time3_addallout += clock()-currenttime; #endif } }
//returns 1 if the solution is found, 0 if the solution does not exist and 2 if it ran out of time int RSTARPlanner::ImprovePath(double MaxNumofSecs) { int expands; RSTARState *rstarstate, *searchgoalstate, *searchstartstate; CKey key, minkey; CKey goalkey; vector<int> SuccIDV; vector<int> CLowV; int highlevelexpands = 0; expands = 0; if(pSearchStateSpace->searchgoalstate == NULL) { printf("ERROR searching: no goal state is set\n"); exit(1); } //goal state searchgoalstate = (RSTARState*)(pSearchStateSpace->searchgoalstate->PlannerSpecificData); if(searchgoalstate->callnumberaccessed != pSearchStateSpace->callnumber) ReInitializeSearchStateInfo(searchgoalstate); //get the start state searchstartstate = (RSTARState*)(pSearchStateSpace->searchstartstate->PlannerSpecificData); //set goal key if(searchgoalstate != searchstartstate){ goalkey.key[0] = 1; goalkey.key[1] = INFINITECOST; } else{ goalkey = ComputeKey(searchstartstate); } //expand states until done minkey = pSearchStateSpace->OPEN->getminkeyheap(); while(!pSearchStateSpace->OPEN->emptyheap() && (clock()-TimeStarted) < MaxNumofSecs*(double)CLOCKS_PER_SEC) { //recompute minkey minkey = pSearchStateSpace->OPEN->getminkeyheap(); //recompute goalkey if necessary goalkey = ComputeKey(searchgoalstate); if(goalkey < minkey) break; //termination condition //pop the min element rstarstate = (RSTARState*)pSearchStateSpace->OPEN->deleteminheap(); fprintf(fDeb, "ComputePath: selected state %d g=%d\n", rstarstate->MDPstate->StateID, rstarstate->g); environment_->PrintState(rstarstate->MDPstate->StateID, true, fDeb); if (rstarstate->MDPstate != pSearchStateSpace->searchstartstate && ((RSTARACTIONDATA*)rstarstate->bestpredaction->PlannerSpecificData)->pathIDs.size() == 0) { fprintf(fDeb, "re-compute path\n"); int maxe = INFINITECOST; int maxc = INFINITECOST; //predecessor RSTARState* rstarpredstate = (RSTARState*)GetState(rstarstate->bestpredaction->SourceStateID)->PlannerSpecificData; CMDPACTION* computedaction = rstarstate->bestpredaction; RSTARACTIONDATA* computedactiondata = (RSTARACTIONDATA*)computedaction->PlannerSpecificData; if(computedactiondata->exp < RSTAR_EXPTHRESH) { maxe = RSTAR_EXPTHRESH; } else { printf("Trying to compute hard-to-find path\n"); fprintf(fDeb, "Trying to compute hard-to-find path\n"); /* TODO CKey nextkey = rstarPlanner.OPEN->getminkeyheap(); if(bforwardsearch) h = environment_->GetFromToHeuristic(rstarstate->MDPstate->StateID, pSearchStateSpace->GoalState->StateID); else h = environment_->GetFromToHeuristic(pSearchStateSpace->GoalState->StateID, rstarstate->MDPstate->StateID); maxc = nextkey[1] - (rstarpredstate->g + rstarPlanner.epsilon*h) + RSTAR_COSTDELTA; */ } fprintf(fDeb, "recomputing path from bp %d to state %d with maxc=%d maxe=%d\n", rstarpredstate->MDPstate->StateID, rstarstate->MDPstate->StateID, maxc, maxe); fprintf(fDeb, "bp state:\n"); environment_->PrintState(rstarpredstate->MDPstate->StateID, true, fDeb); //re-compute the path int NewGoalStateID = rstarstate->MDPstate->StateID; ComputeLocalPath(rstarpredstate->MDPstate->StateID, rstarstate->MDPstate->StateID, maxc, maxe, &computedaction->Costs[0], &computedactiondata->clow, &computedactiondata->exp, &computedactiondata->pathIDs, &NewGoalStateID); fprintf(fDeb, "return values: pathcost=%d clow=%d exp=%d\n", computedaction->Costs[0], computedactiondata->clow, computedactiondata->exp); bool bSwitch = false; if(NewGoalStateID != rstarstate->MDPstate->StateID) { bSwitch = true; fprintf(fDeb, "targetstate was switched from %d to %d\n", rstarstate->MDPstate->StateID, NewGoalStateID); fprintf(stdout, "targetstate was switched from %d to %d\n", rstarstate->MDPstate->StateID, NewGoalStateID); environment_->PrintState(NewGoalStateID, true, fDeb); RSTARState* rstarNewTargetState = (RSTARState*)GetState(NewGoalStateID)->PlannerSpecificData; //re-initialize the state if necessary if(rstarNewTargetState->callnumberaccessed != pSearchStateSpace->callnumber) ReInitializeSearchStateInfo(rstarNewTargetState); fprintf(fDeb, "predstate.g=%d actual actioncost=%d clow=%d newtartetstate.g=%d\n", rstarpredstate->g, computedaction->Costs[0], ((RSTARACTIONDATA*)computedaction->PlannerSpecificData)->clow, rstarNewTargetState->g); //add the successor to our graph CMDPACTION* action = rstarpredstate->MDPstate->AddAction(rstarpredstate->MDPstate->Actions.size()); action->AddOutcome(rstarNewTargetState->MDPstate->StateID, computedaction->Costs[0], 1.0); action->PlannerSpecificData = new RSTARACTIONDATA; MaxMemoryCounter += sizeof(RSTARACTIONDATA); ((RSTARACTIONDATA*)action->PlannerSpecificData)->clow = computedactiondata->clow; ((RSTARACTIONDATA*)action->PlannerSpecificData)->exp = computedactiondata->exp; ((RSTARACTIONDATA*)action->PlannerSpecificData)->pathIDs = computedactiondata->pathIDs; //add the corresponding predaction rstarNewTargetState->predactionV.push_back(action); //the action was not found to the old state computedaction->Costs[0] = INFINITECOST; if(bforwardsearch) computedactiondata->clow = environment_->GetFromToHeuristic(rstarpredstate->MDPstate->StateID, rstarstate->MDPstate->StateID); else computedactiondata->clow = environment_->GetFromToHeuristic(rstarstate->MDPstate->StateID, rstarpredstate->MDPstate->StateID); computedactiondata->pathIDs.clear(); rstarstate = rstarNewTargetState; computedaction = action; computedactiondata = (RSTARACTIONDATA*)action->PlannerSpecificData; } //clean up local search memory DestroyLocalSearchMemory(); RSTARState* stateu = NULL; CMDPACTION* utosaction = NULL; int hfromstarttostate; if(bforwardsearch) hfromstarttostate = environment_->GetFromToHeuristic(searchstartstate->MDPstate->StateID, rstarstate->MDPstate->StateID); else hfromstarttostate = environment_->GetFromToHeuristic(rstarstate->MDPstate->StateID,searchstartstate->MDPstate->StateID); if (computedactiondata->pathIDs.size() == 0 || rstarpredstate->g + computedactiondata->clow > pSearchStateSpace->eps*hfromstarttostate) { fprintf(fDeb, "selecting best pred\n"); //fprintf(stdout, "selecting best pred\n"); //select other best predecessor unsigned int minQ = INFINITECOST; for(int i = 0; i < (int)rstarstate->predactionV.size(); i++) { CMDPACTION* predaction = rstarstate->predactionV.at(i); rstarpredstate = (RSTARState*)GetState(predaction->SourceStateID)->PlannerSpecificData; if(minQ >= rstarpredstate->g + ((RSTARACTIONDATA*)predaction->PlannerSpecificData)->clow) { minQ = rstarpredstate->g + ((RSTARACTIONDATA*)predaction->PlannerSpecificData)->clow; stateu = rstarpredstate; utosaction = predaction; } } //set the predecessor SetBestPredecessor(rstarstate, stateu, utosaction); } else if(rstarpredstate->g + computedactiondata->clow < rstarstate->g || bSwitch == false) { fprintf(fDeb, "keeping the same computedaction\n"); //fprintf(stdout, "keeping the same best pred\n"); stateu = rstarpredstate; utosaction = computedaction; //set the predecessor SetBestPredecessor(rstarstate, stateu, utosaction); } else { fprintf(fDeb, "keeping the same bestpredaction even though switch of targetstates happened\n"); } } else { fprintf(fDeb, "high-level normal expansion of state %d\n", rstarstate->MDPstate->StateID); environment_->PrintState(rstarstate->MDPstate->StateID, true, fDeb); highlevelexpands++; //close the state rstarstate->iterationclosed = pSearchStateSpace->searchiteration; //expansion expands++; //generate SUCCS state SuccIDV.clear(); CLowV.clear(); if(bforwardsearch) environment_->GetRandomSuccsatDistance(rstarstate->MDPstate->StateID, &SuccIDV, &CLowV); else environment_->GetRandomPredsatDistance(rstarstate->MDPstate->StateID, &SuccIDV, &CLowV); fprintf(fDeb, "%d succs were generated at random\n", SuccIDV.size()); //iterate over states in SUCCS set int notclosed = 0; for(int i = 0; i < (int)SuccIDV.size(); i++) { RSTARState* rstarSuccState = (RSTARState*)GetState(SuccIDV.at(i))->PlannerSpecificData; fprintf(fDeb, "succ %d:\n", i); environment_->PrintState(rstarSuccState->MDPstate->StateID, true, fDeb); //skip if the state is already closed if(rstarSuccState->iterationclosed == pSearchStateSpace->searchiteration) { fprintf(fDeb, "this succ was already closed -- skipping it\n"); continue; } notclosed++; //re-initialize the state if necessary if(rstarSuccState->callnumberaccessed != pSearchStateSpace->callnumber) ReInitializeSearchStateInfo(rstarSuccState); //if(rstarSuccState->MDPstate->StateID == 3838){ // fprintf(fDeb, "generating state %d with g=%d bp=%d:\n", rstarSuccState->MDPstate->StateID, rstarSuccState->g, // (rstarSuccState->bestpredaction==NULL?-1:rstarSuccState->bestpredaction->SourceStateID)); // Env_PrintState(rstarSuccState->MDPstate->StateID, true, false, fDeb); //} //add the successor to our graph CMDPACTION* action = rstarstate->MDPstate->AddAction(i); action->AddOutcome(rstarSuccState->MDPstate->StateID, INFINITECOST, 1.0); action->PlannerSpecificData = new RSTARACTIONDATA; MaxMemoryCounter += sizeof(RSTARACTIONDATA); ((RSTARACTIONDATA*)action->PlannerSpecificData)->clow = CLowV[i]; ((RSTARACTIONDATA*)action->PlannerSpecificData)->exp = 0; ((RSTARACTIONDATA*)action->PlannerSpecificData)->pathIDs.clear(); //add the corresponding predaction rstarSuccState->predactionV.push_back(action); //see if we can improve g-value of successor if(rstarSuccState->bestpredaction == NULL || rstarstate->g + CLowV[i] < rstarSuccState->g) { SetBestPredecessor(rstarSuccState, rstarstate, action); fprintf(fDeb, "bestpred was set for the succ (clow=%d)\n", CLowV[i]); } else { fprintf(fDeb, "bestpred was NOT modified - old one is better\n"); } } //printf("%d successors were not closed\n", notclosed); }//else //recompute minkey minkey = pSearchStateSpace->OPEN->getminkeyheap(); //recompute goalkey if necessary goalkey = ComputeKey(searchgoalstate); if(expands%10 == 0 && expands > 0) { printf("high-level expands so far=%u\n", expands); } } printf("main loop done\n"); fprintf(fDeb, "main loop done\n"); int retv = 1; if(searchgoalstate->g == INFINITECOST && pSearchStateSpace->OPEN->emptyheap()) { printf("solution does not exist: search exited because heap is empty\n"); retv = 0; } else if(!pSearchStateSpace->OPEN->emptyheap() && goalkey > minkey) { printf("search exited because it ran out of time\n"); retv = 2; } else if(searchgoalstate->g == INFINITECOST && !pSearchStateSpace->OPEN->emptyheap()) { printf("solution does not exist: search exited because all candidates for expansion have infinite heuristics\n"); retv = 0; } else { printf("search exited with a solution for eps=%.3f\n", pSearchStateSpace->eps); retv = 1; } fprintf(fDeb, "high-level expanded=%d\n", expands); highlevel_searchexpands += expands; return retv; }