Пример #1
0
void agent_cleanup() {
	clearRLStruct(&this_action);
	clearRLStruct(&last_action);
	freeRLStructPointer(last_observation);
	
	if(value_function!=0){
		free(value_function);
		value_function=0;
	}
}
Пример #2
0
void agent_end(double reward) {
	int lastState=last_observation->intArray[0];
	int lastAction=last_action.intArray[0];
	
	double Q_sa=value_function[calculateArrayIndex(lastState,lastAction)];
	double new_Q_sa=Q_sa + sarsa_stepsize * (reward - Q_sa);

	/*	Only update the value function if the policy is not frozen */
	if(!policy_frozen){
		value_function[calculateArrayIndex(lastState,lastAction)]=new_Q_sa;
	}
	clearRLStruct(&last_action);
	clearRLStruct(last_observation);
}
Пример #3
0
const observation_t *env_start()
{
	episodeCount++;
	stepCount=0;
	clearRLStruct(o);
	__RL_CHECK_STRUCT(o)
	return o;
}
Пример #4
0
void RL_cleanup() {
	int experimentState = kRLCleanup;

	assert(theExperimentConnection != 0);

	rlBufferClear(&clientexp_rlbuffer);
	rlSendBufferData(theExperimentConnection, &clientexp_rlbuffer, experimentState);

	rlBufferClear(&clientexp_rlbuffer);
	rlRecvBufferData(theExperimentConnection, &clientexp_rlbuffer, &experimentState);
	assert(experimentState == kRLCleanup);

	clearRLStruct(&clientexp_observation);
	clearRLStruct(&clientexp_action);

	/*safe even if it is null */
	free(clientexp_message);
	clientexp_message = 0;

	clientexp_messagecapacity = 0;
}
Пример #5
0
void agent_cleanup() {
	clearRLStruct(&currentAction);
	for (CStringUnorderedMap< double* >::iterator iter = tableQ.begin(); iter != tableQ.end();)
	{
		double total_interesting_moves = 0.0;
		double* values = iter->second;
		char* tmpChar = iter->first;
		iter = tableQ.erase(iter);
		free(tmpChar);
		delete[] values;
	}
}
Пример #6
0
const reward_observation_terminal_t *env_step(const action_t *a)
{
	int terminal=0;
	stepCount++;
	clearRLStruct(o);
	    
        /*Short episode with big observations*/
        if(episodeCount%2==0){
			__RL_CHECK_STRUCT(o)
            set_k_ints_in_abstract_type(o, 50000);
			__RL_CHECK_STRUCT(o)
            set_k_doubles_in_abstract_type(o, 50000);
			__RL_CHECK_STRUCT(o)

            if(stepCount==200)terminal=1;
        }
Пример #7
0
static void onAgentCleanup(int theConnection) {
	/* Read the data in the buffer (data from server) */
	/* No data sent for agent cleanup */

	/* Call RL method on the recv'd data */
	agent_cleanup();

	/* Prepare the buffer for sending data back to the server */
	rlBufferClear(&theBuffer);

	/* Cleanup our resources */
	clearRLStruct(&clientagent_observation);
	free(theTaskSpec);
	free(clientagent_inmessage);

	theTaskSpec = 0;
	clientagent_inmessage = 0;
	clientagent_inmessagecapacity = 0;
}
Пример #8
0
void agent_init(const char * task_spec) {
    clearRLStruct(emptyAction);
}
Пример #9
0
void env_cleanup()
{
	clearRLStruct(&this_observation);
}