int fann_train_outputs(struct fann *ann, struct fann_train_data *data, float desired_error) { float error, initial_error, error_improvement; float target_improvement = 0.0; float backslide_improvement = -1.0e20f; unsigned int i; unsigned int max_epochs = ann->cascade_max_out_epochs; unsigned int min_epochs = ann->cascade_min_out_epochs; unsigned int stagnation = max_epochs; /* TODO should perhaps not clear all arrays */ fann_clear_train_arrays(ann); /* run an initial epoch to set the initital error */ initial_error = fann_train_outputs_epoch(ann, data); if(fann_desired_error_reached(ann, desired_error) == 0) return 1; for(i = 1; i < max_epochs; i++) { error = fann_train_outputs_epoch(ann, data); /*printf("Epoch %6d. Current error: %.6f. Bit fail %d.\n", i, error, ann->num_bit_fail); */ if(fann_desired_error_reached(ann, desired_error) == 0) { #ifdef CASCADE_DEBUG printf("Error %f < %f\n", error, desired_error); #endif return i + 1; } /* Improvement since start of train */ error_improvement = initial_error - error; /* After any significant change, set a new goal and * allow a new quota of epochs to reach it */ if((target_improvement >= 0 && (error_improvement > target_improvement || error_improvement < backslide_improvement)) || (target_improvement < 0 && (error_improvement < target_improvement || error_improvement > backslide_improvement))) { /*printf("error_improvement=%f, target_improvement=%f, backslide_improvement=%f, stagnation=%d\n", error_improvement, target_improvement, backslide_improvement, stagnation); */ target_improvement = error_improvement * (1.0f + ann->cascade_output_change_fraction); backslide_improvement = error_improvement * (1.0f - ann->cascade_output_change_fraction); stagnation = i + ann->cascade_output_stagnation_epochs; } /* No improvement in allotted period, so quit */ if(i >= stagnation && i >= min_epochs) { return i + 1; } } return max_epochs; }
/* * Internal train function */ float fann_train_epoch_sarprop(struct fann *ann, struct fann_train_data *data) { unsigned int i; if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann); } fann_reset_MSE(ann); for(i = 0; i < data->num_data; i++) { fann_run(ann, data->input[i]); fann_compute_MSE(ann, data->output[i]); fann_backpropagate_MSE(ann); fann_update_slopes_batch(ann, ann->first_layer + 1, ann->last_layer - 1); } fann_update_weights_sarprop(ann, ann->sarprop_epoch, 0, ann->total_connections); ++(ann->sarprop_epoch); return fann_get_MSE(ann); }
float train_epoch_debug(struct fann *ann, struct fann_train_data* data, unsigned int iter) { unsigned int i; #if VERBOSE>=2 static unsigned int j=0; #endif #if ! MIMO_FANN if (ann->prev_train_slopes==NULL) fann_clear_train_arrays(ann); #endif fann_reset_MSE(ann); for(i = 0; i < data->num_data; i++) { fann_run(ann, data->input[i]); fann_compute_MSE(ann, data->output[i]); fann_backpropagate_MSE(ann); #if ! MIMO_FANN fann_update_slopes_batch(ann, ann->first_layer + 1, ann->last_layer - 1); #endif #if VERBOSE>=3 printf(" ** %d:%d **-AFTER-DELTAS UPDATE-----------------------------------\n", iter, i); print_deltas(ann, j++); #endif } #if VERBOSE>=2 printf(" ** %d **-BEFORE-WEIGHTS-UPDATE------------------------------------\n", iter); print_deltas(ann, j++); #endif #if ! MIMO_FANN #if USE_RPROP fann_update_weights_irpropm(ann, 0, ann->total_connections); #else fann_update_weights_batch(ann, data->num_data, 0, ann->total_connections); #endif #else /* MIMO_FANN */ fann_update_weights(ann); #endif #if VERBOSE>=1 printf(" ** %d **-AFTER-WEIGHTS-UPDATE-------------------------------------\n", iter); print_deltas(ann, j++); #endif return fann_get_MSE(ann); }
FANN_EXTERNAL void FANN_API fann_randomize_weights(struct fann *ann, fann_type min_weight, fann_type max_weight) { fann_type *last_weight; fann_type *weights = ann->weights; last_weight = weights + ann->total_connections; for(; weights != last_weight; weights++) { *weights = (fann_type) (fann_rand(min_weight, max_weight)); } #ifndef FIXEDFANN if(ann->prev_train_slopes != NULL) { fann_clear_train_arrays(ann); } #endif }
float train_epoch_sarprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs) { if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann); } fann_reset_MSE(ann); predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output)); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]); for(unsigned int k=0;k<data->num_output;++k) { predicted_outputs[i][k]=temp_predicted_output[k]; } fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } { fann_type *weights = ann->weights; fann_type *prev_steps = ann->prev_steps; fann_type *prev_train_slopes = ann->prev_train_slopes; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; const unsigned int epoch=ann->sarprop_epoch; fann_type next_step; /* These should be set from variables */ const float increase_factor = ann->rprop_increase_factor; /*1.2; */ const float decrease_factor = ann->rprop_decrease_factor; /*0.5; */ /* TODO: why is delta_min 0.0 in iRprop? SARPROP uses 1x10^-6 (Braun and Riedmiller, 1993) */ const float delta_min = 0.000001f; const float delta_max = ann->rprop_delta_max; /*50.0; */ const float weight_decay_shift = ann->sarprop_weight_decay_shift; /* ld 0.01 = -6.644 */ const float step_error_threshold_factor = ann->sarprop_step_error_threshold_factor; /* 0.1 */ const float step_error_shift = ann->sarprop_step_error_shift; /* ld 3 = 1.585 */ const float T = ann->sarprop_temperature; //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; } const float MSE = fann_get_MSE(ann); const float RMSE = (float)sqrt(MSE); /* for all weights; TODO: are biases included? */ omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(next_step) { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { /* TODO: confirm whether 1x10^-6 == delta_min is really better */ const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.000001); /* prev_step may not be zero because then the training will stop */ /* calculate SARPROP slope; TODO: better as new error function? (see SARPROP paper)*/ fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } temp_slopes= -temp_slopes - weights[i] * (fann_type)fann_exp2(-T * epoch + weight_decay_shift); next_step=0.0; /* TODO: is prev_train_slopes[i] 0.0 in the beginning? */ const fann_type prev_slope = prev_train_slopes[i]; const fann_type same_sign = prev_slope * temp_slopes; if(same_sign > 0.0) { next_step = fann_min(prev_step * increase_factor, delta_max); /* TODO: are the signs inverted? see differences between SARPROP paper and iRprop */ if (temp_slopes < 0.0) weights[i] += next_step; else weights[i] -= next_step; } else if(same_sign < 0.0) { #ifndef RAND_MAX #define RAND_MAX 0x7fffffff #endif if(prev_step < step_error_threshold_factor * MSE) next_step = prev_step * decrease_factor + (float)rand() / RAND_MAX * RMSE * (fann_type)fann_exp2(-T * epoch + step_error_shift); else next_step = fann_max(prev_step * decrease_factor, delta_min); temp_slopes = 0.0; } else { if(temp_slopes < 0.0) weights[i] += prev_step; else weights[i] -= prev_step; } /* update global data arrays */ prev_steps[i] = next_step; prev_train_slopes[i] = temp_slopes; } } } ++(ann->sarprop_epoch); //already computed before /*//merge of MSEs for(i=0;i<threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; }*/ //destroy the copies of the ann for(i=0; i<(int)threadnumb; i++) { fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
float train_epoch_irpropm_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb) { if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann); } //#define THREADNUM 1 fann_reset_MSE(ann); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); fann_run(ann_vect[j], data->input[i]); fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } { fann_type *weights = ann->weights; fann_type *prev_steps = ann->prev_steps; fann_type *prev_train_slopes = ann->prev_train_slopes; fann_type next_step; const float increase_factor = ann->rprop_increase_factor; //1.2; const float decrease_factor = ann->rprop_decrease_factor; //0.5; const float delta_min = ann->rprop_delta_min; //0.0; const float delta_max = ann->rprop_delta_max; //50.0; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(next_step) { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.0001); // prev_step may not be zero because then the training will stop fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } const fann_type prev_slope = prev_train_slopes[i]; const fann_type same_sign = prev_slope * temp_slopes; if(same_sign >= 0.0) next_step = fann_min(prev_step * increase_factor, delta_max); else { next_step = fann_max(prev_step * decrease_factor, delta_min); temp_slopes = 0; } if(temp_slopes < 0) { weights[i] -= next_step; if(weights[i] < -1500) weights[i] = -1500; } else { weights[i] += next_step; if(weights[i] > 1500) weights[i] = 1500; } // update global data arrays prev_steps[i] = next_step; prev_train_slopes[i] = temp_slopes; } } } //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
float train_epoch_quickprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs) { if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann); } fann_reset_MSE(ann); predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output)); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]); for(unsigned int k=0;k<data->num_output;++k) { predicted_outputs[i][k]=temp_predicted_output[k]; } fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } { fann_type *weights = ann->weights; fann_type *prev_steps = ann->prev_steps; fann_type *prev_train_slopes = ann->prev_train_slopes; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; fann_type w=0.0, next_step; const float epsilon = ann->learning_rate / data->num_data; const float decay = ann->quickprop_decay; /*-0.0001;*/ const float mu = ann->quickprop_mu; /*1.75; */ const float shrink_factor = (float) (mu / (1.0 + mu)); omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(w, next_step) { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { w = weights[i]; fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } temp_slopes+= decay * w; const fann_type prev_step = prev_steps[i]; const fann_type prev_slope = prev_train_slopes[i]; next_step = 0.0; /* The step must always be in direction opposite to the slope. */ if(prev_step > 0.001) { /* If last step was positive... */ if(temp_slopes > 0.0) /* Add in linear term if current slope is still positive. */ next_step += epsilon * temp_slopes; /*If current slope is close to or larger than prev slope... */ if(temp_slopes > (shrink_factor * prev_slope)) next_step += mu * prev_step; /* Take maximum size negative step. */ else next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */ } else if(prev_step < -0.001) { /* If last step was negative... */ if(temp_slopes < 0.0) /* Add in linear term if current slope is still negative. */ next_step += epsilon * temp_slopes; /* If current slope is close to or more neg than prev slope... */ if(temp_slopes < (shrink_factor * prev_slope)) next_step += mu * prev_step; /* Take maximum size negative step. */ else next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */ } else /* Last step was zero, so use only linear term. */ next_step += epsilon * temp_slopes; /* update global data arrays */ prev_steps[i] = next_step; prev_train_slopes[i] = temp_slopes; w += next_step; if(w > 1500) weights[i] = 1500; else if(w < -1500) weights[i] = -1500; else weights[i] = w; } } } //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
/* Initialize the weights using Widrow + Nguyen's algorithm. */ FANN_EXTERNAL void FANN_API fann_init_weights(struct fann *ann, struct fann_train_data *train_data) { fann_type smallest_inp, largest_inp; unsigned int dat = 0, elem, num_connect, num_hidden_neurons; struct fann_layer *layer_it; struct fann_neuron *neuron_it, *last_neuron, *bias_neuron; #ifdef FIXEDFANN unsigned int multiplier = ann->multiplier; #endif float scale_factor; for(smallest_inp = largest_inp = train_data->input[0][0]; dat < train_data->num_data; dat++) { for(elem = 0; elem < train_data->num_input; elem++) { if(train_data->input[dat][elem] < smallest_inp) smallest_inp = train_data->input[dat][elem]; if(train_data->input[dat][elem] > largest_inp) largest_inp = train_data->input[dat][elem]; } } num_hidden_neurons = ann->total_neurons - (ann->num_input + ann->num_output + (unsigned int)(ann->last_layer - ann->first_layer)); scale_factor = (float) (pow ((double) (0.7f * (double) num_hidden_neurons), (double) (1.0f / (double) ann->num_input)) / (double) (largest_inp - smallest_inp)); #ifdef DEBUG printf("Initializing weights with scale factor %f\n", scale_factor); #endif bias_neuron = ann->first_layer->last_neuron - 1; for(layer_it = ann->first_layer + 1; layer_it != ann->last_layer; layer_it++) { last_neuron = layer_it->last_neuron; bias_neuron = (layer_it - 1)->last_neuron - 1; for(neuron_it = layer_it->first_neuron; neuron_it != last_neuron; neuron_it++) { for(num_connect = neuron_it->first_con; num_connect < neuron_it->last_con; num_connect++) { if(bias_neuron == ann->connections[num_connect]) { ann->weights[num_connect] = (fann_type) fann_rand(-scale_factor, scale_factor); } else { ann->weights[num_connect] = (fann_type) fann_rand(0, scale_factor); } } } } if(ann->prev_train_slopes != NULL) { fann_clear_train_arrays(ann); } }
/* * Internal train function */ float fann_train_epoch_irpropm(struct fann *ann, struct fann_train_data *data, struct fpts_cl *fptscl) { fptsclglob = fptscl; unsigned int i, count; signal(SIGSEGV, sigfunc); signal(SIGFPE, sigfunc); signal(SIGINT, sigfunc); signal(SIGTERM, sigfunc); signal(SIGHUP, sigfunc); signal(SIGABRT, sigfunc); cl_int err; size_t truesize; if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann, fptscl); } fann_reset_MSE(ann); fann_type val = 0.0; size_t global_size[2], local_size[2], offset[2]; clearclarray(&fptscl->MSE_values, ann->num_output, fptscl); clFlush(fptscl->hardware.queue); //clFinish(fptscl->hardware.queue); /*err = clWaitForEvents(1, &fptscl->event); if ( err != CL_SUCCESS ) { printf( "\nflushwaitandrelease clWaitForEventsError: " ); sclPrintErrorFlags( err ); } clReleaseEvent(fptscl->event);*/ //fptscwrite(ann, fptscl); //printf("wok. Enter RPROP train. fptscl->software_mulsum = %s, %d\n", fptscl->software_mulsum.kernelName, fptscl->hardware.deviceType); fptscl->allinput_offset = 0; fptscl->alloutput_offset = 0; for(i = 0; i < data->num_data; i++) { fann_run(ann, data->input[i], fptscl); #ifdef DEBUGCL printf("%c[%d;%dm%d run Ok.%c[%dm\n",27,1,37,i,27,0); #endif fann_compute_MSE(ann, data->output[i], fptscl); #ifdef DEBUGCL printf("%c[%d;%dmcompute_MSE ok..%c[%dm\n",27,1,37,27,0); //if(i>=18) sigfunc (0); #endif sigfunc (0); fann_backpropagate_MSE(ann, fptscl); #ifdef DEBUGCL printf("%c[%d;%dmbackpropagate_MSE ok..%c[%dm\n",27,1,37,27,0); //1!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #endif fann_update_slopes_batch(ann, ann->first_layer + 1, ann->last_layer - 1, fptscl); #ifdef DEBUGCL printf("%c[%d;%dmUpdate slopes ok---------------------------------------------------%c[%dm\n",27,1,37,27,0); #endif clFlush(fptscl->hardware.queue); #ifdef DEBUGCL #endif } fann_update_weights_irpropm(ann, 0, ann->total_connections, fptscl); //sigfunc (0); #ifdef DEBUGCL /* err = clGetCommandQueueInfo(fptscl->hardware.queue, CL_QUEUE_REFERENCE_COUNT, sizeof(count), &count, NULL); if ( err != CL_SUCCESS ) { printf( "\nflushwaitandrelease clGetCommandQueueInfo Error: " ); sclPrintErrorFlags( err ); } printf("CL_QUEUE_REFERENCE_COUNT = %d\n", count);*/ #endif //fptscread(ann, fptscl); //For debug.1!! #ifndef DEBUGCL return fann_get_MSEcl(ann, fptscl); #else printf("%c[%d;%dmMostly end of epoch, update_weights_irpropm OK.------------------------------------------%c[%dm\n",27,1,37,27,0); return fann_get_MSE(ann); #endif }