예제 #1
0
void VIPlanner::PrintPolicy(FILE* fPolicy)
{
	bool bPrintStatOnly = true;
	vector<CMDPSTATE*> WorkList;
	CMDP PolicyforEvaluation;

	viPlanner.iteration++;	
	WorkList.push_back(viPlanner.StartState);
	((VIState*)viPlanner.StartState->PlannerSpecificData)->iteration = viPlanner.iteration;
	double PolVal = 0.0;
	double Conf = 0;
	bool bCycles = false;
	SBPL_PRINTF("Printing policy...\n");
	while((int)WorkList.size() > 0)
	{
		//pop the last state
		CMDPSTATE* state = WorkList.at(WorkList.size()-1);
		WorkList.pop_back();
		VIState* statedata = (VIState*)state->PlannerSpecificData;

		CMDPSTATE* polstate = PolicyforEvaluation.AddState(state->StateID);

		//print state ID
		if(!bPrintStatOnly)
		{
			SBPL_FPRINTF(fPolicy, "%d\n", state->StateID); 
			environment_->PrintState(state->StateID, false, fPolicy);

			int h = environment_->GetGoalHeuristic(state->StateID);
			SBPL_FPRINTF(fPolicy, "h=%d\n", h);
			if(h > statedata->v)
			{
				SBPL_FPRINTF(fPolicy, "WARNING h overestimates exp.cost\n");
			}
		}

		if(state->StateID == viPlanner.GoalState->StateID) 
		{
			//goal state
			if(!bPrintStatOnly)
				SBPL_FPRINTF(fPolicy, "0\n");
			Conf += ((VIState*)state->PlannerSpecificData)->Pc;
		}
		else if(statedata->bestnextaction == NULL)
		{
			//unexplored
			if(!bPrintStatOnly)
			{
				//no outcome explored - stay in the same place
				SBPL_FPRINTF(fPolicy, "%d %d %d\n", 1, 0, state->StateID);
			}
		}
		else
		{

			//get best action
			CMDPACTION* action = statedata->bestnextaction;

			//add action to evaluation MDP
			CMDPACTION* polaction = polstate->AddAction(action->ActionID);

			if(!bPrintStatOnly)
				SBPL_FPRINTF(fPolicy, "%d ", (unsigned int)action->SuccsID.size());

			//print successors and insert them into the list
			for(int i = 0; i < (int)action->SuccsID.size(); i++)
			{
				if(!bPrintStatOnly)
					SBPL_FPRINTF(fPolicy, "%d %d ", action->Costs[i], action->SuccsID[i]);
				polaction->AddOutcome(action->SuccsID[i], action->Costs[i], action->SuccsProb[i]);

				CMDPSTATE* succstate = GetState(action->SuccsID[i]);
				if((int)((VIState*)succstate->PlannerSpecificData)->iteration != viPlanner.iteration)
				{
					((VIState*)succstate->PlannerSpecificData)->iteration = viPlanner.iteration;
					WorkList.push_back(succstate);

					((VIState*)succstate->PlannerSpecificData)->Pc = 
						action->SuccsProb[i]*((VIState*)state->PlannerSpecificData)->Pc;
					PolVal += ((VIState*)succstate->PlannerSpecificData)->Pc*action->Costs[i];
				}
			}
			if(!bPrintStatOnly)
				SBPL_FPRINTF(fPolicy, "\n");
		}
	}//while worklist not empty
	SBPL_PRINTF("done\n");

	//now evaluate the policy
	double PolicyValue = -1;
	bool bFullPolicy = false;
	double Pcgoal = -1;
	int nMerges = 0;
	EvaluatePolicy(&PolicyforEvaluation, viPlanner.StartState->StateID,
		viPlanner.GoalState->StateID, &PolicyValue, &bFullPolicy, &Pcgoal, &nMerges,
		&bCycles);

	SBPL_PRINTF("Policy value = %f FullPolicy=%d Merges=%d Cycles=%d\n", 
		PolicyValue, bFullPolicy, nMerges, bCycles);
	
	if(!bFullPolicy)
		SBPL_PRINTF("WARN: POLICY IS ONLY PARTIAL\n");
	if(fabs(PolicyValue-((VIState*)(viPlanner.StartState->PlannerSpecificData))->v) > MDP_ERRDELTA)
		SBPL_PRINTF("WARN: POLICY VALUE IS NOT CORRECT\n");


	if(!bPrintStatOnly)
		SBPL_FPRINTF(fPolicy, "backups=%d runtime=%f vstart=%f policyvalue=%f fullpolicy=%d Pc(goal)=%f nMerges=%d bCyc=%d\n", 
			g_backups, (double)g_runtime/CLOCKS_PER_SEC, 
			((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,		
			PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles);
	else
		SBPL_FPRINTF(fPolicy, "%d %f %f %f %d %f %d %d\n", 
			g_backups, (double)g_runtime/CLOCKS_PER_SEC, 
			((VIState*)(viPlanner.StartState->PlannerSpecificData))->v,		
			PolicyValue, bFullPolicy, Pcgoal, nMerges, bCycles);

}
예제 #2
0
void EnvironmentNAV2D::SetAllActionsandAllOutcomes(CMDPSTATE* state)
{

	int cost;

#if DEBUG
	if(state->StateID >= (int)EnvNAV2D.StateID2CoordTable.size())
	{
		SBPL_ERROR("ERROR in Env... function: stateID illegal\n");
		throw new SBPL_Exception();
	}

	if((int)state->Actions.size() != 0)
	{
		SBPL_ERROR("ERROR in Env_setAllActionsandAllOutcomes: actions already exist for the state\n");
		throw new SBPL_Exception();
	}
#endif
	

	//goal state should be absorbing
	if(state->StateID == EnvNAV2D.goalstateid)
		return;

	//get X, Y for the state
	EnvNAV2DHashEntry_t* HashEntry = EnvNAV2D.StateID2CoordTable[state->StateID];
	
	//iterate through actions
    bool bTestBounds = false;
    if(HashEntry->X <= 1 || HashEntry->X >= EnvNAV2DCfg.EnvWidth_c-2 || 
       HashEntry->Y <= 1 || HashEntry->Y >= EnvNAV2DCfg.EnvHeight_c-2)
        bTestBounds = true;
	for (int aind = 0; aind < EnvNAV2DCfg.numofdirs; aind++)
	{
        int newX = HashEntry->X + EnvNAV2DCfg.dx_[aind];
        int newY = HashEntry->Y + EnvNAV2DCfg.dy_[aind];

        //skip the invalid cells
        if(bTestBounds){
            if(!IsValidCell(newX, newY))
                continue;
        }

		//compute cost multiplier
		int costmult = EnvNAV2DCfg.Grid2D[newX][newY];
		//for diagonal move, take max over adjacent cells
        if(newX != HashEntry->X && newY != HashEntry->Y && aind <= 7)
		{
			//check two more cells through which the action goes
			costmult = __max(costmult, 	EnvNAV2DCfg.Grid2D[HashEntry->X][newY]);
			costmult = __max(costmult,  EnvNAV2DCfg.Grid2D[newX][HashEntry->Y]);
		}
		else if(aind > 7)
		{
			//check two more cells through which the action goes
            costmult = __max(costmult, EnvNAV2DCfg.Grid2D[HashEntry->X + EnvNAV2DCfg.dxintersects_[aind][0]][HashEntry->Y + EnvNAV2DCfg.dyintersects_[aind][0]]);
            costmult = __max(costmult, EnvNAV2DCfg.Grid2D[HashEntry->X + EnvNAV2DCfg.dxintersects_[aind][1]][HashEntry->Y + EnvNAV2DCfg.dyintersects_[aind][1]]);
		}


		//check that it is valid
		if(costmult >= EnvNAV2DCfg.obsthresh)
			continue;

		//otherwise compute the actual cost
		cost = (costmult+1)*EnvNAV2DCfg.dxy_distance_mm_[aind];

		//add the action
		CMDPACTION* action = state->AddAction(aind);

#if TIME_DEBUG
		clock_t currenttime = clock();
#endif
        

    	EnvNAV2DHashEntry_t* OutHashEntry;
		if((OutHashEntry = GetHashEntry(newX, newY)) == NULL)
		{
			//have to create a new entry
			OutHashEntry = CreateNewHashEntry(newX, newY);
		}
		action->AddOutcome(OutHashEntry->stateID, cost, 1.0); 

#if TIME_DEBUG
		time3_addallout += clock()-currenttime;
#endif

	}
}
예제 #3
0
//returns 1 if the solution is found, 0 if the solution does not exist and 2 if it ran out of time
int RSTARPlanner::ImprovePath(double MaxNumofSecs)
{
	int expands;
	RSTARState *rstarstate, *searchgoalstate, *searchstartstate;
	CKey key, minkey;
	CKey goalkey;

    vector<int> SuccIDV;
    vector<int> CLowV;
	int highlevelexpands = 0;

	expands = 0;

	if(pSearchStateSpace->searchgoalstate == NULL)
	{
		printf("ERROR searching: no goal state is set\n");
		exit(1);
	}

	//goal state
	searchgoalstate = (RSTARState*)(pSearchStateSpace->searchgoalstate->PlannerSpecificData);
	if(searchgoalstate->callnumberaccessed != pSearchStateSpace->callnumber)
		ReInitializeSearchStateInfo(searchgoalstate);

	//get the start state
	searchstartstate = (RSTARState*)(pSearchStateSpace->searchstartstate->PlannerSpecificData);

	//set goal key
    if(searchgoalstate != searchstartstate){
        goalkey.key[0] = 1;
        goalkey.key[1] = INFINITECOST;
    }
    else{
        goalkey = ComputeKey(searchstartstate);
    }


	//expand states until done
	minkey = pSearchStateSpace->OPEN->getminkeyheap();
	while(!pSearchStateSpace->OPEN->emptyheap() &&
		(clock()-TimeStarted) < MaxNumofSecs*(double)CLOCKS_PER_SEC) 
    {
      
      //recompute minkey
      minkey = pSearchStateSpace->OPEN->getminkeyheap();
      
      //recompute goalkey if necessary
      goalkey = ComputeKey(searchgoalstate);
      
      if(goalkey < minkey)
	break; //termination condition


        //pop the min element
        rstarstate = (RSTARState*)pSearchStateSpace->OPEN->deleteminheap();

        fprintf(fDeb, "ComputePath:  selected state %d g=%d\n", rstarstate->MDPstate->StateID, rstarstate->g);
        environment_->PrintState(rstarstate->MDPstate->StateID, true, fDeb);
        
        if (rstarstate->MDPstate != pSearchStateSpace->searchstartstate &&
            ((RSTARACTIONDATA*)rstarstate->bestpredaction->PlannerSpecificData)->pathIDs.size() == 0) 
        {
            fprintf(fDeb, "re-compute path\n");
  
            int maxe = INFINITECOST;
            int maxc = INFINITECOST;

            //predecessor
            RSTARState* rstarpredstate = (RSTARState*)GetState(rstarstate->bestpredaction->SourceStateID)->PlannerSpecificData;
            CMDPACTION* computedaction = rstarstate->bestpredaction;
            RSTARACTIONDATA* computedactiondata = (RSTARACTIONDATA*)computedaction->PlannerSpecificData;

            
            if(computedactiondata->exp < RSTAR_EXPTHRESH)
            {
                maxe = RSTAR_EXPTHRESH;
            }
            else
            {
                printf("Trying to compute hard-to-find path\n");
                fprintf(fDeb, "Trying to compute hard-to-find path\n");
                /* TODO
                CKey nextkey = rstarPlanner.OPEN->getminkeyheap();
                
				if(bforwardsearch)
					h = environment_->GetFromToHeuristic(rstarstate->MDPstate->StateID, pSearchStateSpace->GoalState->StateID);
				else
					h = environment_->GetFromToHeuristic(pSearchStateSpace->GoalState->StateID, rstarstate->MDPstate->StateID);

                maxc = nextkey[1] -
                     (rstarpredstate->g + rstarPlanner.epsilon*h) + RSTAR_COSTDELTA;
                */
            }


            fprintf(fDeb, "recomputing path from bp %d to state %d with maxc=%d maxe=%d\n", 
                rstarpredstate->MDPstate->StateID, rstarstate->MDPstate->StateID, maxc, maxe);
            fprintf(fDeb, "bp state:\n");
            environment_->PrintState(rstarpredstate->MDPstate->StateID, true, fDeb);

            //re-compute the path
            int NewGoalStateID = rstarstate->MDPstate->StateID;
            ComputeLocalPath(rstarpredstate->MDPstate->StateID, rstarstate->MDPstate->StateID, maxc, maxe, 
                &computedaction->Costs[0], &computedactiondata->clow, &computedactiondata->exp, &computedactiondata->pathIDs, &NewGoalStateID);

            fprintf(fDeb, "return values: pathcost=%d clow=%d exp=%d\n", 
                computedaction->Costs[0], computedactiondata->clow, computedactiondata->exp);
            bool bSwitch = false;
            if(NewGoalStateID != rstarstate->MDPstate->StateID)
            {
                bSwitch = true;
                fprintf(fDeb, "targetstate was switched from %d to %d\n", rstarstate->MDPstate->StateID, NewGoalStateID);
                fprintf(stdout, "targetstate was switched from %d to %d\n", rstarstate->MDPstate->StateID, NewGoalStateID);
                environment_->PrintState(NewGoalStateID, true, fDeb);

                RSTARState* rstarNewTargetState = (RSTARState*)GetState(NewGoalStateID)->PlannerSpecificData;
				
				//re-initialize the state if necessary
				if(rstarNewTargetState->callnumberaccessed != pSearchStateSpace->callnumber)
					ReInitializeSearchStateInfo(rstarNewTargetState);

                fprintf(fDeb, "predstate.g=%d actual actioncost=%d clow=%d newtartetstate.g=%d\n", rstarpredstate->g, computedaction->Costs[0], 
                        ((RSTARACTIONDATA*)computedaction->PlannerSpecificData)->clow, rstarNewTargetState->g);

                //add the successor to our graph
                CMDPACTION* action = rstarpredstate->MDPstate->AddAction(rstarpredstate->MDPstate->Actions.size());
                action->AddOutcome(rstarNewTargetState->MDPstate->StateID, computedaction->Costs[0], 1.0);
                action->PlannerSpecificData = new RSTARACTIONDATA;
				MaxMemoryCounter += sizeof(RSTARACTIONDATA);
                ((RSTARACTIONDATA*)action->PlannerSpecificData)->clow = computedactiondata->clow;
                ((RSTARACTIONDATA*)action->PlannerSpecificData)->exp = computedactiondata->exp;
                ((RSTARACTIONDATA*)action->PlannerSpecificData)->pathIDs = computedactiondata->pathIDs;
                
                //add the corresponding predaction
                rstarNewTargetState->predactionV.push_back(action);
    
                //the action was not found to the old state
                computedaction->Costs[0] = INFINITECOST;
				if(bforwardsearch)
	                computedactiondata->clow = environment_->GetFromToHeuristic(rstarpredstate->MDPstate->StateID, rstarstate->MDPstate->StateID);
				else
	                computedactiondata->clow = environment_->GetFromToHeuristic(rstarstate->MDPstate->StateID, rstarpredstate->MDPstate->StateID);

                computedactiondata->pathIDs.clear();

                rstarstate = rstarNewTargetState;
                computedaction = action; 
                computedactiondata = (RSTARACTIONDATA*)action->PlannerSpecificData;
            }

            //clean up local search memory
            DestroyLocalSearchMemory();
           
            RSTARState* stateu = NULL;
            CMDPACTION* utosaction = NULL;
			int hfromstarttostate;
			if(bforwardsearch)
				hfromstarttostate = environment_->GetFromToHeuristic(searchstartstate->MDPstate->StateID, rstarstate->MDPstate->StateID);
			else
				hfromstarttostate = environment_->GetFromToHeuristic(rstarstate->MDPstate->StateID,searchstartstate->MDPstate->StateID);
            if (computedactiondata->pathIDs.size() == 0 || 
                rstarpredstate->g + computedactiondata->clow > pSearchStateSpace->eps*hfromstarttostate)
            {
               fprintf(fDeb, "selecting best pred\n");
               //fprintf(stdout, "selecting best pred\n");

                //select other best predecessor
                unsigned int minQ = INFINITECOST;
                for(int i = 0; i < (int)rstarstate->predactionV.size(); i++)
                {
                    CMDPACTION* predaction = rstarstate->predactionV.at(i);
                    rstarpredstate = (RSTARState*)GetState(predaction->SourceStateID)->PlannerSpecificData;    
                    if(minQ >= rstarpredstate->g + ((RSTARACTIONDATA*)predaction->PlannerSpecificData)->clow)
                    {
                        minQ = rstarpredstate->g + ((RSTARACTIONDATA*)predaction->PlannerSpecificData)->clow;
                        stateu = rstarpredstate;
                        utosaction = predaction;
                    }
                }

                //set the predecessor
                SetBestPredecessor(rstarstate, stateu, utosaction);
            }
            else if(rstarpredstate->g + computedactiondata->clow < rstarstate->g || bSwitch == false)
            {
               fprintf(fDeb, "keeping the same computedaction\n");
               //fprintf(stdout, "keeping the same best pred\n");

                stateu = rstarpredstate;
                utosaction = computedaction;

                //set the predecessor
                SetBestPredecessor(rstarstate, stateu, utosaction);

            }
            else
            {
                fprintf(fDeb, "keeping the same bestpredaction even though switch of targetstates happened\n");
            }           
        }
        else
        {
            fprintf(fDeb, "high-level normal expansion of state %d\n", rstarstate->MDPstate->StateID);
            environment_->PrintState(rstarstate->MDPstate->StateID, true, fDeb);
            highlevelexpands++;

            //close the state
            rstarstate->iterationclosed = pSearchStateSpace->searchiteration;

            //expansion
            expands++;

            //generate SUCCS state
            SuccIDV.clear();
            CLowV.clear();
			if(bforwardsearch)
	            environment_->GetRandomSuccsatDistance(rstarstate->MDPstate->StateID, &SuccIDV, &CLowV); 
			else
	            environment_->GetRandomPredsatDistance(rstarstate->MDPstate->StateID, &SuccIDV, &CLowV); 


            fprintf(fDeb, "%d succs were generated at random\n", SuccIDV.size());

            //iterate over states in SUCCS set
            int notclosed = 0;
            for(int i = 0; i < (int)SuccIDV.size(); i++)
            {
                RSTARState* rstarSuccState = (RSTARState*)GetState(SuccIDV.at(i))->PlannerSpecificData;

                fprintf(fDeb, "succ %d:\n", i);
                environment_->PrintState(rstarSuccState->MDPstate->StateID, true, fDeb);
                
                //skip if the state is already closed
                if(rstarSuccState->iterationclosed == pSearchStateSpace->searchiteration)
                {
                    fprintf(fDeb, "this succ was already closed -- skipping it\n");
                    continue;
                }
                notclosed++;
 				
				//re-initialize the state if necessary
				if(rstarSuccState->callnumberaccessed != pSearchStateSpace->callnumber)
					ReInitializeSearchStateInfo(rstarSuccState);
                
                //if(rstarSuccState->MDPstate->StateID == 3838){
                //    fprintf(fDeb, "generating state %d with g=%d bp=%d:\n", rstarSuccState->MDPstate->StateID, rstarSuccState->g, 
                //            (rstarSuccState->bestpredaction==NULL?-1:rstarSuccState->bestpredaction->SourceStateID));
                //    Env_PrintState(rstarSuccState->MDPstate->StateID, true, false, fDeb);
                //}


                //add the successor to our graph
                CMDPACTION* action = rstarstate->MDPstate->AddAction(i);
                action->AddOutcome(rstarSuccState->MDPstate->StateID, INFINITECOST, 1.0);
                action->PlannerSpecificData = new RSTARACTIONDATA;
				MaxMemoryCounter += sizeof(RSTARACTIONDATA);
                ((RSTARACTIONDATA*)action->PlannerSpecificData)->clow = CLowV[i];
                ((RSTARACTIONDATA*)action->PlannerSpecificData)->exp = 0;
                ((RSTARACTIONDATA*)action->PlannerSpecificData)->pathIDs.clear();

                //add the corresponding predaction
                rstarSuccState->predactionV.push_back(action);

                //see if we can improve g-value of successor
                if(rstarSuccState->bestpredaction == NULL || rstarstate->g + CLowV[i] < rstarSuccState->g)
                {
                    SetBestPredecessor(rstarSuccState, rstarstate, action);
                    fprintf(fDeb, "bestpred was set for the succ (clow=%d)\n", CLowV[i]);
                }
                else
                {
                    fprintf(fDeb, "bestpred was NOT modified - old one is better\n");
                }

            }

            //printf("%d successors were not closed\n", notclosed);
        }//else


		//recompute minkey
		minkey = pSearchStateSpace->OPEN->getminkeyheap();

		//recompute goalkey if necessary
		goalkey = ComputeKey(searchgoalstate);

		if(expands%10 == 0 && expands > 0)
		{
			printf("high-level expands so far=%u\n", expands);
		}

	}
    printf("main loop done\n");
    fprintf(fDeb, "main loop done\n");

	int retv = 1;
	if(searchgoalstate->g == INFINITECOST && pSearchStateSpace->OPEN->emptyheap())
	{
		printf("solution does not exist: search exited because heap is empty\n");
		retv = 0;
	}
	else if(!pSearchStateSpace->OPEN->emptyheap() && goalkey > minkey)
	{
		printf("search exited because it ran out of time\n");
		retv = 2;
	}
	else if(searchgoalstate->g == INFINITECOST && !pSearchStateSpace->OPEN->emptyheap())
	{
		printf("solution does not exist: search exited because all candidates for expansion have infinite heuristics\n");
		retv = 0;
	}
	else
	{
		printf("search exited with a solution for eps=%.3f\n", pSearchStateSpace->eps);
		retv = 1;
	}

	fprintf(fDeb, "high-level expanded=%d\n", expands);

	highlevel_searchexpands += expands;

	return retv;		
}