//==========================================================
// ANN_Test()
//----------------------------------------------------------
/// Given an input and test pattern, return the MSE between the network's output and the test pattern.
real ANN_Test(ANN * ann, real * x, real * t)
{
	//LISTITEM *p = LastListItem(ann->c);
	//Layer *l = (Layer *) p->obj;
	real sum = 0.0f;
	int j;
	ANN_Input(ann, x);

	for (j = 0; j < ann->n_outputs; j++) {
		//real f = l->f_d(ann->y[j]);
		real e = t[j] - ann->y[j];
		ann->error[j] = e;
		ann->d[j] =0.0;// e * f;
		sum += e * e;
	}
	return sum;
}
Exemple #2
0
/// Perform mean square error training, where the aim is to minimise
/// the cost function \f$\sum_i |f(x_i)-t_i|^2\f$, where \f$x_i\f$ is
/// input data, \f$f(\cdot)\f$ is the mapping performed by the neural
/// network, \f$t_i\f$ is the desired output and \f$i\f$ denotes the example
/// index. Under mild assumptions, this is equivalent to minimising
/// \f$E\{|f(X)-T|^2\}\f$, the expected value of the squared error.
real ANN_Train(ANN * ann, real * x, real * t)
{
	LISTITEM *p = LastListItem(ann->c);
	Layer *l = (Layer *) p->obj;
	real sum = 0.0f;
	int j;

	ANN_Input(ann, x);

	for (j = 0; j < ann->n_outputs; j++) {
		real f = l->f_d(ann->y[j]);
		real e = t[j] - ann->y[j];
		ann->error[j] = e;
		ann->d[j] = e * f;
		sum += e * e;
	}

	l->backward(p, ann->d, ann->eligibility_traces, 0.0);

	return sum;
}
Exemple #3
0
int ANN_Policy::SelectAction (real* s, real r, int forced_a)
{
	int a; // selected action
	int amax; //maximum evaluated action
	real* Q_s; // pointer to evaluations for state s
	if (confidence) {
		if (separate_actions) {
			for (int i=0; i<n_actions; i++) {
				ANN_StochasticInput (Ja[i], s);
				JQs[i] = ANN_GetOutput(Ja[i])[0];
			}
			Q_s = JQs;
		} else {
			ANN_StochasticInput (J, s);
			Q_s = ANN_GetOutput (J);
		}
	} else {
		if (separate_actions) {
			for (int i=0; i<n_actions; i++) {
				ANN_Input (Ja[i], s);
				JQs[i] = ANN_GetOutput(Ja[i])[0];
			}
			Q_s = JQs;
		} else {
			ANN_Input (J, s);
			Q_s = ANN_GetOutput (J);
		}
	}
	int argmax = argMax (Q_s);

	if (forced_learning) {
		a = forced_a;
	} else if (confidence) {
		a = argmax;
	} else if (smax) {
		a = softMax (Q_s);
		//printf ("Q[%d][%d]=%f\n", s, a, Q[s][a]);
	} else {
		a = eGreedy (Q_s);
	}

	if (a<0 || a>=n_actions) {
		fprintf (stderr, "Action %d out of bounds\n", a);
	}

	switch (learning_method) {
		
	case Sarsa: 
		amax = a;
		break;
	case QLearning:
		amax = argmax;
		break;
	default:
		amax = a;
		fprintf (stderr, "Unknown learning method\n");
	}
	if (pa>=0) { // do not update at start of episode
		real delta = r + gamma*Q_s[amax] - J_ps_pa;
		tdError = delta;
		for (int j=0; j<n_actions; j++) {
			delta_vector[j] = 0.0;
		}
		if (separate_actions) {
			if (eligibility) {
				delta_vector[0] = 1.0;
				ANN_Delta_Train (Ja[pa], delta_vector, delta);
				// Reset other actions' traces.
				for (int i=0; i<n_actions; i++) {
					if (i!=pa) {
						ANN_Reset(Ja[i]);
					}
				}
			} else {
				delta_vector[0] = delta;
				ANN_Delta_Train (Ja[pa], delta_vector, 0.0);
			}
		} else {
			if (J->eligibility_traces) {
				delta_vector[pa] = 1.0;
				ANN_Delta_Train (J, delta_vector, delta);
			} else {
				delta_vector[pa] = delta;
				ANN_Delta_Train (J, delta_vector, 0.0);
			}
		}


	}

	//printf ("%d %d #STATE\n", min_el_state, max_el_state);
	//	printf ("Q[%d,%d]=%f r=%f e=%f ad=%f gl=%f #QV\n",
	//			ps, pa, Q[ps][pa], r, e[ps][pa], ad, gl);

	J_ps_pa = Q_s[a];
	pa = a;

	return a;
}