//========================================================== // ANN_Test() //---------------------------------------------------------- /// Given an input and test pattern, return the MSE between the network's output and the test pattern. real ANN_Test(ANN * ann, real * x, real * t) { //LISTITEM *p = LastListItem(ann->c); //Layer *l = (Layer *) p->obj; real sum = 0.0f; int j; ANN_Input(ann, x); for (j = 0; j < ann->n_outputs; j++) { //real f = l->f_d(ann->y[j]); real e = t[j] - ann->y[j]; ann->error[j] = e; ann->d[j] =0.0;// e * f; sum += e * e; } return sum; }
/// Perform mean square error training, where the aim is to minimise /// the cost function \f$\sum_i |f(x_i)-t_i|^2\f$, where \f$x_i\f$ is /// input data, \f$f(\cdot)\f$ is the mapping performed by the neural /// network, \f$t_i\f$ is the desired output and \f$i\f$ denotes the example /// index. Under mild assumptions, this is equivalent to minimising /// \f$E\{|f(X)-T|^2\}\f$, the expected value of the squared error. real ANN_Train(ANN * ann, real * x, real * t) { LISTITEM *p = LastListItem(ann->c); Layer *l = (Layer *) p->obj; real sum = 0.0f; int j; ANN_Input(ann, x); for (j = 0; j < ann->n_outputs; j++) { real f = l->f_d(ann->y[j]); real e = t[j] - ann->y[j]; ann->error[j] = e; ann->d[j] = e * f; sum += e * e; } l->backward(p, ann->d, ann->eligibility_traces, 0.0); return sum; }
int ANN_Policy::SelectAction (real* s, real r, int forced_a) { int a; // selected action int amax; //maximum evaluated action real* Q_s; // pointer to evaluations for state s if (confidence) { if (separate_actions) { for (int i=0; i<n_actions; i++) { ANN_StochasticInput (Ja[i], s); JQs[i] = ANN_GetOutput(Ja[i])[0]; } Q_s = JQs; } else { ANN_StochasticInput (J, s); Q_s = ANN_GetOutput (J); } } else { if (separate_actions) { for (int i=0; i<n_actions; i++) { ANN_Input (Ja[i], s); JQs[i] = ANN_GetOutput(Ja[i])[0]; } Q_s = JQs; } else { ANN_Input (J, s); Q_s = ANN_GetOutput (J); } } int argmax = argMax (Q_s); if (forced_learning) { a = forced_a; } else if (confidence) { a = argmax; } else if (smax) { a = softMax (Q_s); //printf ("Q[%d][%d]=%f\n", s, a, Q[s][a]); } else { a = eGreedy (Q_s); } if (a<0 || a>=n_actions) { fprintf (stderr, "Action %d out of bounds\n", a); } switch (learning_method) { case Sarsa: amax = a; break; case QLearning: amax = argmax; break; default: amax = a; fprintf (stderr, "Unknown learning method\n"); } if (pa>=0) { // do not update at start of episode real delta = r + gamma*Q_s[amax] - J_ps_pa; tdError = delta; for (int j=0; j<n_actions; j++) { delta_vector[j] = 0.0; } if (separate_actions) { if (eligibility) { delta_vector[0] = 1.0; ANN_Delta_Train (Ja[pa], delta_vector, delta); // Reset other actions' traces. for (int i=0; i<n_actions; i++) { if (i!=pa) { ANN_Reset(Ja[i]); } } } else { delta_vector[0] = delta; ANN_Delta_Train (Ja[pa], delta_vector, 0.0); } } else { if (J->eligibility_traces) { delta_vector[pa] = 1.0; ANN_Delta_Train (J, delta_vector, delta); } else { delta_vector[pa] = delta; ANN_Delta_Train (J, delta_vector, 0.0); } } } //printf ("%d %d #STATE\n", min_el_state, max_el_state); // printf ("Q[%d,%d]=%f r=%f e=%f ad=%f gl=%f #QV\n", // ps, pa, Q[ps][pa], r, e[ps][pa], ad, gl); J_ps_pa = Q_s[a]; pa = a; return a; }