int main(int argc, char* argv[])
{
	std::string filepath("CARTPOLENN");
	unsigned int nbrepi = 100;
	float gamma_ = 0.5f;
	
	float EOE = 5.0f;	//in seconds...
	SimulatorRKCARTPOLE env_(EOE);

	float lr_ = 5e-2f;//unused...
	
	if( argc>1)
	{
		lr_ = atof(argv[1]);
	}
	
	float eps_ = 0.1f;
	int dimActionSpace_ = 1;
	
	int dimStateSpace_ = 4;
	
	QFANN<float> fa_( lr_, eps_, gamma_, dimActionSpace_, filepath);
	 
	QLEARNINGXPReplay instance(nbrepi, gamma_, (Environment<float>*)(&env_), (FA<float>*)&fa_);
	
	instance.run(nbrepi);
	
	fa_.save(filepath);
}
Esempio n. 2
0
DbException::DbException(const DbException &that)
:	__DB_STD(exception)()
,	what_(dupString(that.what_))
,	err_(that.err_)
,	env_(0)
{
}

DbException &DbException::operator = (const DbException &that)
{
	if (this != &that) {
		err_ = that.err_;
		delete [] what_;
		what_ = dupString(that.what_);
	}
	return (*this);
}

void DbException::describe(const char *prefix, const char *description)
{
	char *msgbuf, *p, *end;

	msgbuf = new char[MAX_DESCRIPTION_LENGTH];
	p = msgbuf;
	end = msgbuf + MAX_DESCRIPTION_LENGTH - 1;

	if (prefix != NULL) {
		strncpy(p, prefix, (p < end) ? end - p: 0);
		p += strlen(prefix);
		strncpy(p, ": ", (p < end) ? end - p: 0);
		p += 2;
	}
	if (description != NULL) {
		strncpy(p, description, (p < end) ? end - p: 0);
		p += strlen(description);
		if (err_ != 0) {
			strncpy(p, ": ", (p < end) ? end - p: 0);
			p += 2;
		}
	}
	if (err_ != 0) {
		strncpy(p, db_strerror(err_), (p < end) ? end - p: 0);
		p += strlen(db_strerror(err_));
	}

	/*
	 * If the result was too long, the buffer will not be null-terminated,
	 * so we need to fix that here before duplicating it.
	 */
	if (p >= end)
		*end = '\0';

	what_ = dupString(msgbuf);
	delete [] msgbuf;
}
Esempio n. 3
0
int main(int argc, char* argv[])
{
	std::string filepath("CARTPOLENNACTORCRITIC");
	std::string filepathRSFA(filepath+".FA.txt");
	std::string filepathRSPA(filepath+".PA.txt");
	
	unsigned int nbrthread = 4;
	unsigned int nbrepi = 100;
	float gamma_ = 0.99f;
	
	float EOE = 5.0f;	//in seconds...
	SimulatorRKCARTPOLE env_(EOE);
	
	/*
	2x10 
	float lrPA_ = 1e-2f;
	float lrFA_ = 1e-2f;
	*/
	
	/*
	4x10
	*/
	/* 
	float lrPA_ = 1e-2f;
	float lrFA_ = 1e-2f;
	*/
	
	/*
	4x50
	*/ 
	float lrPA_ = 1e-4f;
	float lrFA_ = 1e-4f;
	/**/
	float eps_ = 0.9f;
	int dimActionSpace_ = 1;
	
	int dimStateSpace_ = 4;
	Topology topoFA;
	unsigned int nbrneuronsFA = 50;
	unsigned int nbrlayerFA = 4;
	unsigned int nbrinputFA = dimActionSpace_+dimStateSpace_;
	unsigned int nbroutputFA = 1;
	topoFA.push_back(nbrinputFA,NTNONE);	//input layer
	//topoFA.push_back(nbrinputFA,NTSIGMOID);	//input layer
	
	//for(int i=nbrlayerFA;i--;)	topoFA.push_back(nbrneuronsFA, NTSIGMOID);
	for(int i=nbrlayerFA;i--;)	topoFA.push_back(nbrneuronsFA, NTTANH);
	
	//topoFA.push_back(nbroutputFA, NTNONE);	//linear output
	//topoFA.push_back(nbroutputFA, NTSIGMOID);	//linear output
	topoFA.push_back(nbroutputFA, NTTANH);	//linear output
	
	QFANN<float> fa_( lrFA_, eps_, gamma_, dimActionSpace_, topoFA, filepathRSFA);
	
	 
	 
	 
	Topology topoPA;
	unsigned int nbrneuronsPA = 50;
	unsigned int nbrlayerPA = 4;
	unsigned int nbrinputPA = dimStateSpace_;
	unsigned int nbroutputPA = dimActionSpace_;
	topoPA.push_back(nbrinputPA,NTNONE);	//input layer
	//topoPA.push_back(nbrinputPA,NTSIGMOID);	//input layer
	
	//for(int i=nbrlayerPA;i--;)	topoPA.push_back(nbrneuronsPA, NTSIGMOID);
	for(int i=nbrlayerPA;i--;)	topoPA.push_back(nbrneuronsPA, NTTANH);
	
	//it would be difficult to get to higher values with a nonlinearity that would reduice the range of possibility, maybe...
	//topoPA.push_back(nbroutputPA, NTNONE);	//linear output
	//topoPA.push_back(nbroutputPA, NTSIGMOID);	//linear output
	topoPA.push_back(nbroutputPA, NTTANH);	//linear output
	
	QPANN<float> pa_( lrPA_, eps_, gamma_, dimActionSpace_, topoPA, filepathRSPA);
	 
	//QLEARNINGXPReplay instance(nbrepi, gamma_, (Environment<float>*)(&env_), (FA<float>*)&fa_);
	//QLEARNINGXPReplayActorCritic instance(nbrepi, gamma_, (Environment<float>*)(&env_), (FA<float>*)&fa_, (PA<float>*)&pa_);
	float momentumUpdate = 1e-4f;
	int freqUpdate = 1;
	DDPGA3C instance(nbrepi, gamma_, (Environment<float>*)(&env_), (FA<float>*)&fa_, (PA<float>*)&pa_, momentumUpdate, freqUpdate);
	
	
	instance.run(nbrepi,nbrthread);
	
	fa_.save(filepath+"FA");
	pa_.save(filepath+"PA");
}