//-------------------------------------------------------------- //copy map (of GridWorld)to maze (of LPA*) void copyDisplayMapToMaze(GridWorld &gWorld, LpaStar* lpa){ for(int i=0; i < gWorld.getGridWorldRows(); i++){ for(int j=0; j < gWorld.getGridWorldCols(); j++){ lpa->maze[i][j].type = gWorld.map[i][j].type; lpa->maze[i][j].x = gWorld.map[i][j].col; lpa->maze[i][j].y = gWorld.map[i][j].row; //lpa->maze[i][j].g = gWorld.map[i][j].g; //lpa->maze[i][j].rhs = gWorld.map[i][j].rhs; } } vertex startV = gWorld.getStartVertex(); vertex goalV = gWorld.getGoalVertex(); //lpa->start->g = gWorld.map[startV.row][startV.col].g ; //lpa->start->rhs = gWorld.map[startV.row][startV.col].rhs ; lpa->start->x = gWorld.map[startV.row][startV.col].col; lpa->start->y = gWorld.map[startV.row][startV.col].row; //lpa->goal->g = gWorld.map[goalV.row][goalV.col].g; //lpa->goal->rhs = gWorld.map[goalV.row][goalV.col].rhs; lpa->goal->x = gWorld.map[goalV.row][goalV.col].col; lpa->goal->y = gWorld.map[goalV.row][goalV.col].row; }
int main() { GridWorld world; // This is optional, and should make solving the model almost instantaneous. // Unfortunately, since our main model is so big, the copying process // still takes a lot of time. But at least that would be a one-time cost! std::cout << currentTimeString() << "- Copying model...!\n"; AIToolbox::MDP::SparseModel model(world); std::cout << currentTimeString() << "- Init solver...!\n"; // This is a method that solves MDPs completely. It has a couple of // parameters available. // The only non-optional parameter is the horizon of the solution; as in // how many steps should the solution look ahead in order to decide which // move to take. If we chose 1, for example, the tiger would only consider // cells next to it to decide where to move; this wouldn't probably be // what we want. // We want the tiger to think for infinite steps: this can be // approximated with a very high horizon, since in theory the final solution // will converge to a single policy anyway. Thus we put a very high number // as the horizon here. AIToolbox::MDP::ValueIteration<decltype(model)> solver(1000000); std::cout << currentTimeString() << "- Starting solver!\n"; // This is where the magic happen. This could take around 10-20 minutes, // depending on your machine (most of the time is spent on this tutorial's // code, however, since it is a pretty inefficient implementation). // But you can play with it and make it better! // // If you are using the Sparse Model though, it is instantaneous since // Eigen is very very efficient in computing the values we need! auto solution = solver(model); std::cout << currentTimeString() << "- Problem solved? " << std::get<0>(solution) << "\n"; AIToolbox::MDP::Policy policy(world.getS(), world.getA(), std::get<1>(solution)); std::cout << "Printing best actions when prey is in (5,5):\n\n"; for ( int y = 10; y >= 0; --y ) { for ( int x = 0; x < 11; ++x ) { std::cout << policy.sampleAction( encodeState(CoordType{{x, y, 5, 5}}) ) << " "; } std::cout << "\n"; } std::cout << "\nSaving policy to file for later usage...\n"; { // You can load up this policy again using ifstreams. // You will not need to solve the model again ever, and you // can embed the policy into any application you want! std::ofstream output("policy.txt"); output << policy; } return 0; }
int main() { GridWorld world; // This is a method that solves MDPs completely. It has a couple of // parameters available, but in our case the defaults are perfectly // fine. AIToolbox::MDP::ValueIteration solver; std::cout << "Starting solver!\n"; // This is where the magic happen. This could take around 10-20 minutes, // depending on your machine (most of the time is spent on this tutorial's // code, however, since it is a pretty inefficient implementation). // But you can play with it and make it better! auto solution = solver(world); std::cout << "Problem solved? " << std::get<0>(solution) << "\n"; AIToolbox::MDP::Policy policy(world.getS(), world.getA(), std::get<1>(solution)); std::cout << "Printing best actions when prey is in (5,5):\n\n"; for ( int y = 10; y >= 0; --y ) { for ( int x = 0; x < 11; ++x ) { std::cout << policy.sampleAction( encodeState(CoordType{{x, y, 5, 5}}) ) << " "; } std::cout << "\n"; } std::cout << "\nSaving policy to file for later usage...\n"; { // You can load up this policy again using ifstreams. // You will not need to solve the model again ever, and you // can embed the policy into any application you want! std::ofstream output("policy.txt"); output << policy; } return 0; }
//-------------------------------------------------------------- //copy maze (from LPA*) to map (of GridWorld) void copyMazeToDisplayMap(GridWorld &gWorld, LpaStar* lpa){ for(int i=0; i < gWorld.getGridWorldRows(); i++){ for(int j=0; j < gWorld.getGridWorldCols(); j++){ gWorld.map[i][j].type = lpa->maze[i][j].type; gWorld.map[i][j].h = lpa->maze[i][j].h; gWorld.map[i][j].g = lpa->maze[i][j].g; gWorld.map[i][j].rhs = lpa->maze[i][j].rhs; gWorld.map[i][j].row = lpa->maze[i][j].y; gWorld.map[i][j].col = lpa->maze[i][j].x; for(int k=0; k < 2; k++){ gWorld.map[i][j].key[k] = lpa->maze[i][j].key[k]; } } } gWorld.map[lpa->start->y][lpa->start->x].h = lpa->start->h; gWorld.map[lpa->start->y][lpa->start->x].g = lpa->start->g; gWorld.map[lpa->start->y][lpa->start->x].rhs = lpa->start->rhs; gWorld.map[lpa->start->y][lpa->start->x].row = lpa->start->y; gWorld.map[lpa->start->y][lpa->start->x].col = lpa->start->x; for(int k=0; k < 2; k++){ gWorld.map[lpa->start->y][lpa->start->x].key[k] = lpa->start->key[k]; } gWorld.map[lpa->goal->y][lpa->goal->x].h = lpa->goal->h; gWorld.map[lpa->goal->y][lpa->goal->x].g = lpa->goal->g; gWorld.map[lpa->goal->y][lpa->goal->x].rhs = lpa->goal->rhs; gWorld.map[lpa->goal->y][lpa->goal->x].row = lpa->goal->y; gWorld.map[lpa->goal->y][lpa->goal->x].col = lpa->goal->x; for(int k=0; k < 2; k++){ gWorld.map[lpa->goal->y][lpa->goal->x].key[k] = lpa->goal->key[k]; } }
void testReinforcement() { GridWorld g; TDLearning(g); g.debug(); }
inline AIToolbox::MDP::Model makeCliffProblem(const GridWorld & grid) { using namespace AIToolbox::MDP; size_t S = grid.getSizeX() * grid.getSizeY() + 2, A = 4; AIToolbox::Table3D transitions(boost::extents[S][A][S]); AIToolbox::Table3D rewards(boost::extents[S][A][S]); double failReward = -100.0, stepReward = -1.0, winReward = 0.0; // Default all transitions within the grid to be deterministic, // and give negative reward. Remember that the actual cliff is // under the grid. for ( size_t s = 0; s < S-2; ++s ) { for ( size_t a = 0; a < A; ++a ) { auto cell = grid(s); cell.setAdjacent((Direction)a); transitions[s][a][cell] = 1.0; rewards[s][a][cell] = stepReward; } } // Attach start and goal states size_t start = S - 2, goal = S - 1; size_t upStart = (grid.getSizeY() - 1) * grid.getSizeX(); size_t upGoal = S - 3; // Fix start transitions[start][UP ][upStart] = 1.0; rewards [start][UP ][upStart] = stepReward; transitions[start][LEFT ][start ] = 1.0; rewards [start][LEFT ][start ] = stepReward; transitions[start][DOWN ][start ] = 1.0; rewards [start][DOWN ][start ] = stepReward; transitions[start][RIGHT][start ] = 1.0; rewards [start][RIGHT][start ] = failReward; // This goes into the cliff // Fix down for upStart transitions[upStart][DOWN][upStart] = 0.0; rewards [upStart][DOWN][upStart] = 0.0; transitions[upStart][DOWN][start ] = 1.0; rewards [upStart][DOWN][start ] = stepReward; // Fix goal (self absorbing) transitions[goal][UP ][goal] = 1.0; transitions[goal][LEFT ][goal] = 1.0; transitions[goal][DOWN ][goal] = 1.0; transitions[goal][RIGHT][goal] = 1.0; // Fix upGoal transitions[upGoal][DOWN][upGoal] = 0.0; rewards [upGoal][DOWN][upGoal] = 0.0; transitions[upGoal][DOWN][goal ] = 1.0; rewards [upGoal][DOWN][goal ] = winReward; // Won! // Fix cliff edge for ( size_t s = upStart + 1; s < upGoal; ++s ) { transitions[s][DOWN][s ] = 0.0; rewards [s][DOWN][s ] = 0.0; transitions[s][DOWN][start] = 1.0; rewards [s][DOWN][start] = failReward; // This goes into the cliff } Model model(S, A, transitions, rewards, 1.0); return model; }
void runSimulation(char *fileName){ WorldBoundaryType worldBoundary; //duplicated in GridWorld DevBoundaryType deviceBoundary; //duplicated in GridWorld bool ANIMATE_MOUSE_FLAG=false; bool validCellSelected=false; static BOOL page=false; int mX, mY; float worldX, worldY; worldX=0.0f; worldY=0.0f; int action=-1; //----------------------- CellPosition p; int rowSelected, colSelected; //----------------------- rowSelected=-1; colSelected=-1; int mouseRadius=1; srand(time(NULL)); // Seed the random number generator //Initialise the world boundaries grid_world.initSystemOfCoordinates(); grid_world.loadMapAndDisplay(fileName); grid_world.initialiseMapConnections(); //---------------------------------------------------------------- //LPA* lpa_star = new LpaStar(grid_world.getGridWorldRows(), grid_world.getGridWorldCols()); vertex start = grid_world.getStartVertex(); vertex goal = grid_world.getGoalVertex(); cout << "(start.col = " << start.col << ", start.row = " << start.row << ")" << endl; cout << "(goal.col = " << goal.col << ", goal.row = " << goal.row << ")" << endl; lpa_star->initialise(start.col, start.row, goal.col, goal.row); //lpa_star->copyMazeToDisplayMap(grid_world); //copyMazeToDisplayMap(grid_world, lpa_star); copyDisplayMapToMaze(grid_world, lpa_star); //---------------------------------------------------------------- worldBoundary = grid_world.getWorldBoundary(); deviceBoundary = grid_world.getDeviceBoundary(); GRIDWORLD_ROWS = grid_world.getGridWorldRows(); GRIDWORLD_COLS = grid_world.getGridWorldCols(); //setvisualpage(page); // keep running the program until the ESC key is pressed while((GetAsyncKeyState(VK_ESCAPE)) == 0 ) { setactivepage(page); cleardevice(); action = getKey(); if(SHOW_MAP_DETAILS) grid_world.displayMapWithDetails(); else grid_world.displayMap(); switch(action){ case 1: //Block selected cell if( rowSelected != -1 && colSelected != -1){ grid_world.setMapTypeValue(rowSelected-1, colSelected-1, '1'); grid_world.initialiseMapConnections(); rowSelected=-1; colSelected=-1; } action = -1; break; case 105: grid_world.displayMapWithKeyDetails(); break; case 106: //~ algorithmSelection = ASTAR_ALGORITHM; break; case 107: //~ algorithmSelection = LPASTAR_ALGORITHM; break; case 108: //~ algorithmSelection = DSTAR_ALGORITHM; break; case 15: if( rowSelected != -1 && colSelected != -1){ grid_world.displayVertexConnections(colSelected-1, rowSelected-1); //cout << "display connections" << endl; rowSelected=-1; colSelected=-1; } else { cout << "invalid new START vertex, please select a new START vertex first." << endl; break; } //-------------------------------------------- action = -1; break; case 16: if(grid_world.isGridMapInitialised()){ grid_world.displayMapConnections(); //cout << "display connections" << endl; //~ rowSelected=-1; //~ colSelected=-1; } else { cout << "map has not been initialised yet." << endl; break; } //-------------------------------------------- action = -1; break; case 6: //set cell as new START vertex { //-------------------------------------------- // retrieve current START vertex vertex s = grid_world.getStartVertex(); if( (s.row != -1) && (s.col != -1) ){ //set current START VERTEX to an ordinary TRAVERSABLE CELL grid_world.setMapTypeValue(s.row, s.col, '0'); grid_world.initialiseMapConnections(); //ok, proceed } else { cout << "invalid START vertex" << endl; break; } //-------------------------------------------- //set selected cell as the NEW START VERTEX if( rowSelected != -1 && colSelected != -1){ grid_world.setMapTypeValue(rowSelected-1, colSelected-1, '6'); s.row = rowSelected-1; s.col = colSelected-1; grid_world.setStartVertex(s); rowSelected=-1; colSelected=-1; } else { cout << "invalid new START vertex, please select a new START vertex first." << endl; break; } //-------------------------------------------- action = -1; break; } case 7: //set cell as new GOAL vertex { //-------------------------------------------- // retrieve current GOAL vertex vertex s = grid_world.getGoalVertex(); if( (s.row != -1) && (s.col != -1) ){ //set current GOAL VERTEX to an ordinary TRAVERSABLE CELL grid_world.setMapTypeValue(s.row, s.col, '0'); //ok, proceed } else { cout << "invalid GOAL vertex" << endl; action = -1; break; } //-------------------------------------------- //set selected cell as the NEW GOAL VERTEX if( rowSelected != -1 && colSelected != -1){ grid_world.setMapTypeValue(rowSelected-1, colSelected-1, '7'); s.row = rowSelected-1; s.col = colSelected-1; grid_world.setGoalVertex(s); grid_world.initialiseMapConnections(); rowSelected=-1; colSelected=-1; } else { cout << "invalid new GOAL vertex, please select a new GOAL vertex first." << endl; action = -1; break; } //-------------------------------------------- action = -1; break; } case 109: copyDisplayMapToMaze(grid_world, lpa_star); cout << "copied display map to algorithm's maze" << endl; action = -1; break; case 110: lpa_star->updateHValues(); copyMazeToDisplayMap(grid_world, lpa_star); cout << "copied algorithm's maze to display map" << endl; action = -1; break; case 9: //display g-values only grid_world.displayMapWithSelectedDetails(true, false, false, false); //(bool display_g, bool display_rhs, bool display_h, bool display_key) action = -1; break; case 10: //display h-values only grid_world.displayMapWithSelectedDetails(false, false, true, false); //(bool display_g, bool display_rhs, bool display_h, bool display_key) action = -1; break; case 11: //display key-values only lpa_star->updateAllKeyValues(); copyMazeToDisplayMap(grid_world, lpa_star); grid_world.displayMapWithSelectedDetails(false, false, false, true); //(bool display_g, bool display_rhs, bool display_h, bool display_key) action = -1; break; case 12: //make cell Traversable if( rowSelected != -1 && colSelected != -1){ grid_world.setMapTypeValue(rowSelected-1, colSelected-1, '0'); rowSelected=-1; colSelected=-1; } action = -1; break; case 14: grid_world.displayMapWithPositionDetails(); action = -1; break; //~ default: //Display grid without details //~ grid_world.displayMap(); }; //---------------------------------------------------------------------------------------------------------------- // Mouse handling // if(mousedown()){ ANIMATE_MOUSE_FLAG=true; mX = mousecurrentx(); mY = mousecurrenty(); //if the goal selected is within the playing field boundaries if(mX >= grid_world.getFieldX1() && mX <= grid_world.getGridMaxX() && mY >= grid_world.getFieldY1() && mY <= grid_world.getGridMaxY()){ circle(mX, mY, 3); validCellSelected = true; } else { validCellSelected = false; } } //end of mousedown() //------------------------------------------------------------------------------------------------------------------ ///////////////////////////////////////////////////////////////////////////// if(ANIMATE_MOUSE_FLAG){ //draw Cross-hair to mark Goal setcolor(RED); circle(mX, mY, 20); line(mX,mY-20,mX,mY+20); line(mX-20,mY,mX+20,mY); //end of draw Cross-hair // special effect to display concentric circles locating the target setcolor(YELLOW); if(mouseRadius < 40) { mouseRadius += 1; } circle(mX, mY, mouseRadius); //Sleep(50); if(mouseRadius >= 40) { ANIMATE_MOUSE_FLAG=false; mouseRadius=0; } //end of special effect } ///////////////////////////////////////////////////////////////////////////// char info[80]; float wX, wY; wX = xWorld(worldBoundary,deviceBoundary,mX); wY = yWorld(worldBoundary,deviceBoundary,mY); sprintf(info,"x: %d, y: %d",mX, mY); drawInformationPanel(grid_world.getFieldX2(), grid_world.getFieldY1() + textheight("H")*2, info); sprintf(info,"wX: %3.0f, wY: %3.0f",wX, wY); drawInformationPanel(grid_world.getFieldX2(),grid_world.getFieldY1() + textheight("H")*5, info); ///////////////////////////////////////////////////////////////////////////// //~ CellPosition p; //~ int rowSelected, colSelected; if(validCellSelected) { p=grid_world.getCellPosition_markCell(mX, mY); rowSelected = p.row; colSelected = p.col; sprintf(info,"row: %d, col: %d",rowSelected, colSelected); drawInformationPanel(grid_world.getFieldX2(),grid_world.getFieldY1() + textheight("H")*6, info); } setvisualpage(page); page = !page; //switch to another page } }
int main(int argc, char* argv[]) { clock_t begin_main = GetTime(); clock_t end_main; bool is_verbose = false; #ifdef OUTPUT is_verbose = true; #endif float discount_factor = 0.0; /* gamma */ float max_reward = 0.0; float weight = 0.0; char algorithm = 0; char opt = 0; unsigned int NUM_ACTIONS = 4; unsigned int NUM_AGENTS = 2; GridWorld *environment = NULL; VariablesInfo* stateDescription = NULL; int next_option = 0; int num_options = 0; /* A string listing valid short options letters: h = help a = number of actions t = number of agents m = algorithm (C=CoLF, K = CKCoLF) g = gamma p = number of steps for each trial e = environment (G = grid for now) */ const char* const short_options = "h:a:o:w:g:e:"; /* An array describing valid long options (see above). */ const struct option long_options[] = { {"help", 0, NULL, 'h'}, {"algorithm", 1, NULL, 'a'}, {"option", 1, NULL, 'o'}, {"weight", 1, NULL, 'w'}, {"gamma", 1, NULL, 'g'}, {"environment", 1, NULL, 'e'}, {NULL, 0, NULL, 0} /* Required at end of array. */ }; srand(time(NULL)); vector<VI*> agents; vector<string> agent_names; do { next_option = getopt_long (argc, argv, short_options, long_options, NULL); switch (next_option) { case 'h': { /* -h or --help */ PrintUsage("Help message",EXIT_OK); } case 'a': { /* -m or --algorithm --> learning algorithm for self play (C, K) */ if (*optarg == 'C' || *optarg == 'F'||*optarg == 'M'||*optarg == 'T') algorithm = *optarg; else PrintUsage("Invalid algorithm, only \'C\' or \'F\' or \'M\' or \'T\' are allowed!",BAD_USAGE); //cout << "Algorithm: " << algorithm << endl; num_options++; break; } case 'o': { if (*optarg == 'u' || *optarg == 'e'||*optarg == 'r') opt = *optarg; else PrintUsage("Invalid algorithm option, only \'u\' or \'e\' or \'r\' are allowed!",BAD_USAGE); //cout << "Algorithm: " << algorithm << endl; num_options++; break; } case 'w': weight = atof(optarg); if (weight < 0 || weight > 1) PrintUsage("Invalid weight, it must be between 0 and 1", BAD_USAGE); //cout << "Gamma: " << discount_factor <<endl;; num_options++; break; case 'g': { /* -g or --gamma --> discount factor */ discount_factor = atof(optarg); if (discount_factor < 0 || discount_factor > 1) PrintUsage("Invalid discount_factor, it must be between 0 and 1", BAD_USAGE); //cout << "Gamma: " << discount_factor <<endl;; num_options++; break; } case 'e': { /* -e or --environment --> environment choosen (G) */ if ( *optarg == 'T') { if (NUM_AGENTS >= 8) PrintUsage("Too many agents for the grid environment; maximum allowed = 7\n", BAD_USAGE); environment = new GridWorld(*optarg); } else if(*optarg == 'C'||*optarg == 'P'||*optarg == 'X'||*optarg == 'G'||*optarg == 'A') environment = new GridWorldWall(*optarg); else PrintUsage("Invalid environment", BAD_USAGE); //cout << "Environment: " << *optarg << endl; num_options++; break; } case '?': { /* The user specified an invalid option. */ PrintUsage (string("Unknown option \'").append(optarg).append("\'"), BAD_USAGE); } case -1: { /* Done with options. */ if (num_options != 5) PrintUsage("Mandatory options not present", BAD_USAGE); break; } default: /* Something else unexpected. */ abort(); } } while (next_option != -1); VariablesInfo* admissible_actions = new VariablesInfo(); CE_VI* agent = NULL; AddAgentNames(&agent_names); AddAgentActions(NUM_ACTIONS, admissible_actions); // Set the intial state of grid environment with random position for all agents // Obviously is passed as argument to each agent... stateDescription = SetInitialState(NUM_AGENTS); environment->Init(stateDescription); //stateDescription = environment->GetStateDescription(); //environment->AppendInit(AppendToInitialState(stateDescription,NUM_AGENTS)); cout << "****" << endl; if (algorithm == 'C' || algorithm == 'F'|| algorithm == 'M'|| algorithm == 'T') { cout << "Creating VI..." <<endl << "algorithm: "<<algorithm<< " option:" << opt<<endl; agent = new CE_VI(agent_names[0], 0, NUM_AGENTS, admissible_actions, discount_factor, max_reward, is_verbose, algorithm, opt, weight, environment); if (agent == NULL) { cerr << "main: VI creation NULL!" << endl; exit(NO_MEM_AVAILABLE); } } else { cout << "The specified algorithm is not available" << endl; cout << "The only algorithms available are:" << endl; cout << "'C': CE-VI" << endl; cout << "'F': Friend-VI" << endl; cout << "'M': MiniMax-VI" << endl; cout << "'T': TB-VI" << endl; exit(1); } agent->SetTransitionModel(environment->GetTransitionModel()); //FOR DEBUG /*vector <unsigned> vState; for (unsigned i= 0; i<81;i++) { cout << "State " << i; vState = environment->GetFromIndex(i,4,3); cout <<": " << vState[0] << "," <<vState[1] << "," <<vState[2] << "," <<vState[3] <<endl; cout << "<-state:" << environment->ComputeJointIndex(vState, 3) << endl; }*/ agent->Execute(); // select and execute action //agent->BuildConvexHull(); delete stateDescription; stateDescription = NULL; //test_environment(environment); //cout << "TOTAL PAYOFF --> G = " << endl << endl; /*for (i = 0; i < agents.size(); i++) { cout << agents[i]->PrintQTable(); }*/ delete admissible_actions; delete environment; // perché diavolo segmenta?!? /*for (i = 0; i < agents.size(); i++) { cout << "main: delete agent " << i << endl; flush(cout); delete agents[i]; }*/ end_main = GetTime(); cout << ">>> main.cpp: elapsed time = " << (double)(end_main-begin_main)/CLOCKS_PER_SEC << " seconds" <<endl; return 0; }