/* Runs an entire episode. */ int performEpisode(int sd) { // Send the INIT command int rc = sendTokenOnSocket(sd, CMD_START); int terminal; double* pStates = (double*)malloc(numStates * sizeof(double)); double* pActions = (double*)malloc(numActions * sizeof(double)); double reward = 0; if(rc >= 0) { // Get terminality flag rc = getFlagFromSocket(sd, &terminal); } if(rc >= 0) { // Get initial states rc = getStatesFromSocket(sd, pStates); } if(rc >= 0) { if(terminal == 1) { // FIXME The reward here is still undefined... rc = agent_end(reward); } else { // Tell agent to start processing states and query // actions... rc = agent_start(pStates, pActions); while(terminal == 0 && rc >= 0) { // Send the step command with the new state rc = sendTokenOnSocket(sd, CMD_STEP); if(rc < 0) break; rc = sendActionsOnSocket(sd, pActions); if(rc < 0) break; // Get the reward, terminality flag and new state of // this step rc = getDoubleFromSocket(sd, &reward); if(rc < 0) break; rc = getFlagFromSocket(sd, &terminal); if(rc < 0) break; rc = getStatesFromSocket(sd, pStates); if(rc < 0) break; // If we are not in a terminal state, tell agent to step if(terminal == 0) rc = agent_step(pStates, reward, pActions); } if(rc >= 0) { // We have reached the terminal state (since there was // no error to get us out of the loop) rc = agent_end(reward); } } } return rc; }
static void onAgentEnd(int theConnection) { double theReward = 0; /* Read the data in the buffer (data from server) */ rlBufferRead(&theBuffer, 0, &theReward, 1, sizeof(double)); /* Call RL method on the recv'd data */ agent_end(theReward); /* Prepare the buffer for sending data back to the server */ rlBufferClear(&theBuffer); }
void SimulationLoop() { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); test->SetTextLine(30); settings.hz = settingsHz; //////////////////////////////////////////////////////////////////////////////////////////// if(start==0){ last_state_main = env_start(); last_action_main = agent_start(last_state_main); start = 1; }else{ env_step1(last_action_main); test->Step(&settings); ro_main=env_step2(); if(ro_main->terminal == 1){ agent_end(ro_main->reward); start = 0; env_reset(); if(ro_main->reward==100){ success++; printf("success %d\tfail %d\tprob %lf\n",success,fail,(double)success/(double)(success+fail)*100); }else{ fail++; } //test->Step(&settings); }else{ last_action_main = agent_step(ro_main->reward, ro_main->observation); } } //////////////////////////////////////////////////////////////////////////////////////////// /*test->Step(&settings);*/ //////////////////////////////////////////////////////////////////////////////////////////// /*ro = env_step2(); this_reward = ro->reward; last_state = ro->observation; roa.reward = ro->reward; roa.observation = ro->observation; roa.terminal = ro->terminal;*/ //////////////////////////////////////////////////////////////////////////////////////////// test->DrawTitle(5, 15, entry->name); glutSwapBuffers(); if (testSelection != testIndex) { testIndex = testSelection; delete test; entry = g_testEntries + testIndex; test = entry->createFcn(); viewZoom = 1.0f; viewCenter.Set(0.0f, 20.0f); Resize(width, height); } }