void testOneEpochTrain(fann *ann, fann_train_data* train, unsigned int trainingAlgorithm, const std::string &header) { printf("TEST: One Epoch %20s ", header.c_str()); bool passed = true; fann *gpunn = fann_copy(ann); fann *cpunn = fann_copy(ann); gpunn->training_algorithm = (fann_train_enum)trainingAlgorithm; cpunn->training_algorithm = (fann_train_enum)trainingAlgorithm; gpuann_fann_train_on_data(gpunn, train, 1); fann_train_epoch(cpunn, train); fann_type *cpuValuesArray = (fann_type *)malloc(cpunn->total_neurons * sizeof(fann_type)); fann_type *gpuValuesArray = (fann_type *)malloc(cpunn->total_neurons * sizeof(fann_type)); fann *tmpnn = cpunn; struct fann_neuron * last_neuron = (tmpnn->last_layer - 1)->last_neuron; fann_neuron *neuronsArray = tmpnn->first_layer->first_neuron; struct fann_neuron * neuron_it = tmpnn->first_layer->first_neuron; for(; neuron_it != last_neuron; neuron_it++) { unsigned int currentNeuronShift = neuron_it - neuronsArray; cpuValuesArray[currentNeuronShift] = neuron_it->value; } tmpnn = gpunn; last_neuron = (tmpnn->last_layer - 1)->last_neuron; neuronsArray = tmpnn->first_layer->first_neuron; neuron_it = tmpnn->first_layer->first_neuron; for(; neuron_it != last_neuron; neuron_it++) { unsigned int currentNeuronShift = neuron_it - neuronsArray; gpuValuesArray[currentNeuronShift] = neuron_it->value; } passed &= isAlmostSameArrays(gpuValuesArray, cpuValuesArray, cpunn->total_neurons, true, "VALUES:"); passed &= isAlmostSameArrays(gpunn->weights, cpunn->weights, cpunn->total_connections, true, "WEIGHTS:"); fann_destroy(gpunn); fann_destroy(cpunn); if(passed) printf("PASSED\n"); else printf("FAILED\n"); }
// fann_copy Creates a copy of a fann structure. int sci_fann_copy(char * fname) { int res; struct fann * result_ann = NULL; struct fann * result_ann_copy = NULL; if ((Rhs!=1)&&(Lhs!=1)) { Scierror(999,"%s usage: ann_out = %s(ann_in)", fname, fname); return 0; } // Get the ann res = detect_fannlist(1); if (res==-1) return 0; result_ann = createCFannStructFromScilabFannStruct(1,&res); if (res==-1) return 0; if (result_ann==NULL) { Scierror(999,"%s: Problem while creating the fann scilab structure\n",fname); return 0; } result_ann_copy = fann_copy(result_ann); res = createScilabFannStructFromCFannStruct(result_ann_copy,Rhs + 1); if (res==-1) return 0; LhsVar(1) = Rhs + 1; return 0; }
void trainMethodsSpeedTestGPU(fann *ann, fann_train_data* train, unsigned int trainingAlgorithm, unsigned int epochCount) { fann *gpunn = fann_copy(ann); gpunn->training_algorithm = (fann_train_enum)trainingAlgorithm; { cudaEvent_t start, stop; float time; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); gpuann_fann_parallel_train_on_data(gpunn, train, epochCount); cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); printf("%10.5f ", time); } fann_destroy(gpunn); }
Classifier::Classifier(const Classifier &o) { numHidden = o.numHidden; maxEpochs = o.maxEpochs; reqError = o.reqError; printEpochs = o.printEpochs; if(o.neuralNetwork) { neuralNetwork = fann_copy(o.neuralNetwork); } }
/* Creer le meilleur fichier .net (apprentissage) possible basé sur des tests effectués au cours de l'apprentissage en fonction du nombre de neuronnes cachés choisis en entrée. */ void train(struct fann *ann, char* trainFile, char *testFile, char *netFile , unsigned int max_epochs, unsigned int epochs_between_reports, float desired_error, const unsigned int num_neurons_hidden) { struct fann_train_data *trainData, *testData; struct fann *annBest = fann_copy(ann); float error; unsigned int i; char buffer[1024]; float testError = 1; float testErrorBest = 1; trainData = fann_read_train_from_file(trainFile); testData = fann_read_train_from_file(testFile); for(i = 1; i <= max_epochs; i++){ fann_shuffle_train_data(trainData); //melange les données error = fann_train_epoch(ann, trainData); //fait une epoque, ann : le réseaux créer, erreur : l'erreur d'apprentissage //Toute les 5 epoques if(i % epochs_between_reports == 0 || error < desired_error){ fann_test_data(ann,testData);// teste le reseau sur les donnée de test testError = fann_get_MSE(ann); if (testError < testErrorBest) { testErrorBest = testError; annBest = fann_copy(ann); printf("Epochs %8d; trainError : %f; testError : %f;\n", i, error,testError); sprintf(buffer,"%s_%u_%d.net",netFile,num_neurons_hidden,i); fann_save(annBest, buffer); } } if(error < desired_error){ break; } } sprintf(buffer,"%s_%u.net",netFile,num_neurons_hidden); fann_save(annBest, buffer); fann_destroy_train(trainData); fann_destroy_train(testData); }
Classifier &Classifier::operator=(const Classifier &o) { if(neuralNetwork != o.neuralNetwork) { numHidden = o.numHidden; maxEpochs = o.maxEpochs; reqError = o.reqError; printEpochs = o.printEpochs; if(neuralNetwork) { fann_destroy(neuralNetwork); } neuralNetwork = fann_copy(o.neuralNetwork); } return *this; }
bool runTest(struct fann *ann, fann_type * input) { fann_type *calc_out_c; fann_type *calc_out_g; fann_type calc_out_gpu, calc_out_cpu; fann *gpunn = fann_copy(ann); fann *cpunn = fann_copy(ann); calc_out_g = gpuann_fann_run(gpunn, input); calc_out_c = fann_run(cpunn, input); calc_out_gpu = calc_out_g[0]; calc_out_cpu = calc_out_c[0]; bool success = (calc_out_cpu - calc_out_gpu) * (calc_out_cpu - calc_out_gpu) < 0.001; fann_destroy(cpunn); fann_destroy(gpunn); return success; }
void testOneEpochParallelTrain(fann *ann, fann_train_data* train, unsigned int trainingAlgorithm, const std::string &header) { printf("TEST: One Epoch Parallel %20s ", header.c_str()); bool passed = true; fann *gpunn = fann_copy(ann); fann *cpunn = fann_copy(ann); gpunn->training_algorithm = (fann_train_enum)trainingAlgorithm; cpunn->training_algorithm = (fann_train_enum)trainingAlgorithm; gpuann_fann_parallel_train_on_data(gpunn, train, 1); fann_train_epoch(cpunn, train); passed &= isAlmostSameArrays(gpunn->weights, cpunn->weights, cpunn->total_connections, true, "WEIGHTS:"); fann_destroy(gpunn); fann_destroy(cpunn); if(passed) printf("PASSED\n"); else printf("FAILED\n"); }
void trainMethodsSpeedTestCPU(fann *ann, fann_train_data* train, unsigned int trainingAlgorithm, unsigned int epochCount) { fann *cpunn = fann_copy(ann); cpunn->training_algorithm = (fann_train_enum)trainingAlgorithm; { clock_t start = clock(); for(unsigned int i = 0; i < epochCount; ++i) fann_train_epoch(cpunn, train); clock_t ends = clock(); printf("%10.5f ", (double) (ends - start) / CLOCKS_PER_SEC * 1000.); } fann_destroy(cpunn); }
float test_data_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs) { if(fann_check_input_output_sizes(ann, data) == -1) return 0; predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output)); fann_reset_MSE(ann); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; ++i) { j=omp_get_thread_num(); fann_type* temp_predicted_output=fann_test(ann_vect[j], data->input[i],data->output[i]); for(unsigned int k=0;k<data->num_output;++k) { predicted_outputs[i][k]=temp_predicted_output[k]; } } } //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
ViFann::ViFann(const ViFann &other) { #ifdef GPU LOG("The GPU version of FANN currently doesn't allow the copy of ANNs.", QtFatalMsg); exit(-1); #else mNetwork = NULL; mTrain = NULL; mInput = NULL; mOutput = NULL; clear(); enableGpu(other.mGpu); if(other.mNetwork != NULL) mNetwork = fann_copy(other.mNetwork); if(other.mTrain != NULL) mTrain = new ViFannTrain(*other.mTrain); if(other.mInput != NULL) mInput = new float[other.mInputCount]; if(other.mOutput != NULL) mOutput = new float[other.mOutputCount]; // Structure mType = other.mType; mInputCount = other.mInputCount; mOutputCount = other.mOutputCount; mNeurons = other.mNeurons; mConnectionRate = other.mConnectionRate; // Weights mWeights = other.mWeights; mWeightsMinimum = other.mWeightsMinimum; mWeightsMaximum = other.mWeightsMaximum; // Training mTraining = other.mTraining; mAlgorithm = other.mAlgorithm; mTrainEpochs = other.mTrainEpochs; mTrainNeurons = other.mTrainNeurons; mTrainMse = other.mTrainMse; mTrainStagnationFraction = other.mTrainStagnationFraction; mTrainStagnationIterations = other.mTrainStagnationIterations; #endif }
float train_epoch_sarprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs) { if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann); } fann_reset_MSE(ann); predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output)); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]); for(unsigned int k=0;k<data->num_output;++k) { predicted_outputs[i][k]=temp_predicted_output[k]; } fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } { fann_type *weights = ann->weights; fann_type *prev_steps = ann->prev_steps; fann_type *prev_train_slopes = ann->prev_train_slopes; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; const unsigned int epoch=ann->sarprop_epoch; fann_type next_step; /* These should be set from variables */ const float increase_factor = ann->rprop_increase_factor; /*1.2; */ const float decrease_factor = ann->rprop_decrease_factor; /*0.5; */ /* TODO: why is delta_min 0.0 in iRprop? SARPROP uses 1x10^-6 (Braun and Riedmiller, 1993) */ const float delta_min = 0.000001f; const float delta_max = ann->rprop_delta_max; /*50.0; */ const float weight_decay_shift = ann->sarprop_weight_decay_shift; /* ld 0.01 = -6.644 */ const float step_error_threshold_factor = ann->sarprop_step_error_threshold_factor; /* 0.1 */ const float step_error_shift = ann->sarprop_step_error_shift; /* ld 3 = 1.585 */ const float T = ann->sarprop_temperature; //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; } const float MSE = fann_get_MSE(ann); const float RMSE = (float)sqrt(MSE); /* for all weights; TODO: are biases included? */ omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(next_step) { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { /* TODO: confirm whether 1x10^-6 == delta_min is really better */ const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.000001); /* prev_step may not be zero because then the training will stop */ /* calculate SARPROP slope; TODO: better as new error function? (see SARPROP paper)*/ fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } temp_slopes= -temp_slopes - weights[i] * (fann_type)fann_exp2(-T * epoch + weight_decay_shift); next_step=0.0; /* TODO: is prev_train_slopes[i] 0.0 in the beginning? */ const fann_type prev_slope = prev_train_slopes[i]; const fann_type same_sign = prev_slope * temp_slopes; if(same_sign > 0.0) { next_step = fann_min(prev_step * increase_factor, delta_max); /* TODO: are the signs inverted? see differences between SARPROP paper and iRprop */ if (temp_slopes < 0.0) weights[i] += next_step; else weights[i] -= next_step; } else if(same_sign < 0.0) { #ifndef RAND_MAX #define RAND_MAX 0x7fffffff #endif if(prev_step < step_error_threshold_factor * MSE) next_step = prev_step * decrease_factor + (float)rand() / RAND_MAX * RMSE * (fann_type)fann_exp2(-T * epoch + step_error_shift); else next_step = fann_max(prev_step * decrease_factor, delta_min); temp_slopes = 0.0; } else { if(temp_slopes < 0.0) weights[i] += prev_step; else weights[i] -= prev_step; } /* update global data arrays */ prev_steps[i] = next_step; prev_train_slopes[i] = temp_slopes; } } } ++(ann->sarprop_epoch); //already computed before /*//merge of MSEs for(i=0;i<threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; }*/ //destroy the copies of the ann for(i=0; i<(int)threadnumb; i++) { fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
float train_epoch_irpropm_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb) { if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann); } //#define THREADNUM 1 fann_reset_MSE(ann); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); fann_run(ann_vect[j], data->input[i]); fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } { fann_type *weights = ann->weights; fann_type *prev_steps = ann->prev_steps; fann_type *prev_train_slopes = ann->prev_train_slopes; fann_type next_step; const float increase_factor = ann->rprop_increase_factor; //1.2; const float decrease_factor = ann->rprop_decrease_factor; //0.5; const float delta_min = ann->rprop_delta_min; //0.0; const float delta_max = ann->rprop_delta_max; //50.0; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(next_step) { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.0001); // prev_step may not be zero because then the training will stop fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } const fann_type prev_slope = prev_train_slopes[i]; const fann_type same_sign = prev_slope * temp_slopes; if(same_sign >= 0.0) next_step = fann_min(prev_step * increase_factor, delta_max); else { next_step = fann_max(prev_step * decrease_factor, delta_min); temp_slopes = 0; } if(temp_slopes < 0) { weights[i] -= next_step; if(weights[i] < -1500) weights[i] = -1500; } else { weights[i] += next_step; if(weights[i] > 1500) weights[i] = 1500; } // update global data arrays prev_steps[i] = next_step; prev_train_slopes[i] = temp_slopes; } } } //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
float train_epoch_quickprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs) { if(ann->prev_train_slopes == NULL) { fann_clear_train_arrays(ann); } fann_reset_MSE(ann); predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output)); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]); for(unsigned int k=0;k<data->num_output;++k) { predicted_outputs[i][k]=temp_predicted_output[k]; } fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } { fann_type *weights = ann->weights; fann_type *prev_steps = ann->prev_steps; fann_type *prev_train_slopes = ann->prev_train_slopes; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; fann_type w=0.0, next_step; const float epsilon = ann->learning_rate / data->num_data; const float decay = ann->quickprop_decay; /*-0.0001;*/ const float mu = ann->quickprop_mu; /*1.75; */ const float shrink_factor = (float) (mu / (1.0 + mu)); omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(w, next_step) { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { w = weights[i]; fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } temp_slopes+= decay * w; const fann_type prev_step = prev_steps[i]; const fann_type prev_slope = prev_train_slopes[i]; next_step = 0.0; /* The step must always be in direction opposite to the slope. */ if(prev_step > 0.001) { /* If last step was positive... */ if(temp_slopes > 0.0) /* Add in linear term if current slope is still positive. */ next_step += epsilon * temp_slopes; /*If current slope is close to or larger than prev slope... */ if(temp_slopes > (shrink_factor * prev_slope)) next_step += mu * prev_step; /* Take maximum size negative step. */ else next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */ } else if(prev_step < -0.001) { /* If last step was negative... */ if(temp_slopes < 0.0) /* Add in linear term if current slope is still negative. */ next_step += epsilon * temp_slopes; /* If current slope is close to or more neg than prev slope... */ if(temp_slopes < (shrink_factor * prev_slope)) next_step += mu * prev_step; /* Take maximum size negative step. */ else next_step += prev_step * temp_slopes / (prev_slope - temp_slopes); /* Else, use quadratic estimate. */ } else /* Last step was zero, so use only linear term. */ next_step += epsilon * temp_slopes; /* update global data arrays */ prev_steps[i] = next_step; prev_train_slopes[i] = temp_slopes; w += next_step; if(w > 1500) weights[i] = 1500; else if(w < -1500) weights[i] = -1500; else weights[i] = w; } } } //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
float train_epoch_batch_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb,vector< vector<fann_type> >& predicted_outputs) { fann_reset_MSE(ann); predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output)); vector<struct fann *> ann_vect(threadnumb); int i=0,j=0; //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]); for(unsigned int k=0;k<data->num_output;++k) { predicted_outputs[i][k]=temp_predicted_output[k]; } fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } //parallel update of the weights { const unsigned int num_data=data->num_data; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; fann_type *weights = ann->weights; const fann_type epsilon = ann->learning_rate / num_data; omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } weights[i] += temp_slopes*epsilon; } } } //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; fann_destroy(ann_vect[i]); } return fann_get_MSE(ann); }
FANN_EXTERNAL float FANN_API fann_train_epoch_batch_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb) { /*vector<struct fann *> ann_vect(threadnumb);*/ struct fann** ann_vect= (struct fann**) malloc(threadnumb * sizeof(struct fann*)); int i=0,j=0; fann_reset_MSE(ann); //generate copies of the ann omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel private(j) { #pragma omp for schedule(static) for(i=0; i<(int)threadnumb; i++) { ann_vect[i]=fann_copy(ann); } //parallel computing of the updates #pragma omp for schedule(static) for(i = 0; i < (int)data->num_data; i++) { j=omp_get_thread_num(); if (ann->do_dropout) { fann_run_dropout(ann_vect[j], data->input[i]); } else { fann_run(ann_vect[j], data->input[i]); } fann_compute_MSE(ann_vect[j], data->output[i]); fann_backpropagate_MSE(ann_vect[j]); fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1); } } //parallel update of the weights { const unsigned int num_data=data->num_data; const unsigned int first_weight=0; const unsigned int past_end=ann->total_connections; fann_type *weights = ann->weights; const fann_type epsilon = ann->learning_rate / num_data; omp_set_dynamic(0); omp_set_num_threads(threadnumb); #pragma omp parallel { #pragma omp for schedule(static) for(i=first_weight; i < (int)past_end; i++) { fann_type temp_slopes=0.0; unsigned int k; fann_type *train_slopes; for(k=0;k<threadnumb;++k) { train_slopes=ann_vect[k]->train_slopes; temp_slopes+= train_slopes[i]; train_slopes[i]=0.0; } weights[i] += temp_slopes*epsilon; } } } //merge of MSEs for(i=0;i<(int)threadnumb;++i) { ann->MSE_value+= ann_vect[i]->MSE_value; ann->num_MSE+=ann_vect[i]->num_MSE; fann_destroy(ann_vect[i]); } free(ann_vect); return fann_get_MSE(ann); }
int main ( int argc, char **argv ) { srand(time(NULL)); if ( argc<=1 ) { // printf ( "neuro num\r\n" ); // exit ( 0 ); } if (argc>2) { //desired_error=atof(argv[2]); numn=atoi(argv[1]); l1n=atoi(argv[2]); if (argc>3) l2n=atoi(argv[3]); if (argc>4) l3n=atoi(argv[4]); if (argc>5) l4n=atoi(argv[5]); if (argc>6) l5n=atoi(argv[6]); if (argc>7) l6n=atoi(argv[7]); } signal ( 2, sig_term ); srand ( time ( NULL ) ); printf("loading training data..."); train_data = fann_read_train_from_file ( "train.dat" ); test_data = fann_read_train_from_file ( "test.dat" ); weight_data=fann_merge_train_data(train_data,test_data); cln_weight_data=fann_duplicate_train_data(weight_data); cln_test_data=fann_duplicate_train_data(test_data); cln_train_data=fann_duplicate_train_data(train_data); //num_neurons_hidden = atoi ( argv[1] ); srand(time(NULL)); y=atoi(argv[2]); lay=atoi(argv[1]); ln=lay+2; if (lay==1) y2=train_data->num_output; best_perc=1; printf("\r\ndoing %ux%u [layers=%u,out=%u]",lay,y,ln, train_data->num_output); while (true) { neur1=1+(rand()%y); neur2=1+(rand()%y); conn_rate=0.5f+((rand()%50)*0.01f); printf("\r\n%2dx%-4d: ",neur1,neur2); // printf("create network: layers=%d l1n=%d l2n=%d l3n=%d l4n=%d\ l5n=%d l6n=%dr\n",numn,l1n,l2n,l3n,l4n,l5n,l6n); ann = fann_create_standard (//conn_rate, ln, train_data->num_input, neur1, neur2, train_data->num_output ); //fann_init_weights ( ann, train_data ); printf(" [%p] ",ann); if ( ( int ) ann==NULL ) { printf ( "error" ); exit ( 0 ); } fann_set_activation_function_hidden(ann,FANN_SIGMOID); fann_set_activation_function_output(ann,FANN_SIGMOID); rebuild_functions(neur1); fann_set_training_algorithm ( ann, FANN_TRAIN_RPROP ); fann_set_sarprop_temperature(ann,15000.0f); //fann_randomize_weights ( ann, -((rand()%10)*0.1f), ((rand()%10)*0.1f) ); fann_init_weights(ann,train_data); got_inc=0; prev_epoch_mse=1; // epochs=0; unsigned last_best_perc_epoch=0; unsigned last_sync_epoch=0; unsigned last_ftest_secs=0; last_sync_epoch=0; last_best_perc_epoch=0; if (good_ann) fann_destroy(good_ann); good_ann=fann_copy(ann); unlink(histfile); for (u=0;u<1000;u++) { fflush(NULL); train_mse=fann_train_epoch(ann, train_data); if (jitter_train) apply_jjit(train_data,cln_train_data); if (time(NULL)-last_ftest_secs>=1) { //printf("\r\n%5u %9.6f %5.2f ",epochs,train_mse,test_perc); //printf(" %4.2f",test_perc); printf("."); last_ftest_secs=time(NULL); } ftest_data(); plot(epochs,train_mse,test_mse); /* if (epochs>10&&((int)test_perc==43||(int)test_perc==57)) { printf(" [excluded %.2f] ",test_perc); break; } else { } */ //printf("excluded %f ",test_perc); double prev_test_perc; // if (prev_epoch_mse==best_perc) // printf("o"); if ((int)test_perc>(int)train_perc&&epochs-last_stat_epoch>10) { fann_destroy(good_ann); good_ann=fann_copy(ann); if (test_perc!=prev_test_perc) printf("%.2f [%f]",test_perc,train_mse); //printf(" sync[%4.2f]",test_perc); last_stat_epoch=epochs; } else if (epochs-last_sync_epoch>111500) { last_sync_epoch=epochs; } if (epochs>210&&test_perc>best_perc) { // u--; // fann_destroy(good_ann); // good_ann=fann_copy(ann); printf(" [saved best %.0f] ",test_perc); last_stat_epoch=epochs; // printf("%f",test_perc); // fann_destroy(ann); // ann=fann_copy(good_ann); fann_save(ann,"mutate-best.net"); best_perc=test_perc; printf(" %6.2f [%f]",test_perc,train_mse); last_best_perc_epoch=epochs; } else if (epochs>11100&&((int)test_perc<=63||(int)test_perc==(int)prev_test_perc)) { //best_perc=test_perc; // printf("x"); // printf("."); //printf("\r%6.8f",train_mse); // printf("done\r\n"); break; } static unsigned last_restore_epoch=0; if (epochs>100&&test_mse-train_mse>=0.25f&&epochs-last_restore_epoch>=120) { /* fann_set_learning_rate ( ann,0.31f+(rand()%90)*0.01f); fann_set_learning_momentum(ann,(rand()%90)*0.01f); printf(" [restored @ %u lr %.2f mm %.2f]",epochs,fann_get_learning_rate(ann), fann_get_learning_momentum(ann)); fann_destroy(ann); ann=fann_copy(good_ann); last_stat_epoch=epochs; last_restore_epoch=epochs; */ double rdec,rinc; rdec=0.0101f+((rand()%100)*0.00001f); if (!rdec) rdec=0.01f; rinc=1.0001f+((rand()%90)*0.00001f); if (!rinc) rinc=1.1f; static double prev_test_epoch_mse; // rinc+=diff_mse*0.000001f; // fann_set_rprop_increase_factor(ann,rinc ); // fann_set_rprop_decrease_factor(ann, rdec); } else if (test_mse-train_mse<=0.1f) { fann_destroy(good_ann); good_ann=fann_copy(ann); // printf("s"); } else { fann_set_sarprop_temperature(ann,fann_get_sarprop_temperature(ann)-0.0001f); } static unsigned last_train_change_epoch=0; if (test_mse>=train_mse&&epochs-last_train_change_epoch>=100) { last_train_change_epoch=epochs; //fann_set_training_algorithm(ann,FANN_TRAIN_SARPROP); jitter_train=0; } else { //fann_set_training_algorithm(ann,FANN_TRAIN_RPROP); jitter_train=0; } got_inc=test_perc-prev_epoch_mse; prev_epoch_mse=test_perc; prev_test_perc=test_perc; epochs++; if (epochs-last_best_perc_epoch>511500) { printf(" failed"); break; } if (epochs>2200&&(int)train_perc<40) { printf("skip 1\r\n"); break; } if ((int)test_perc>=80) { printf("\r\ngot it %f\r\n",test_perc); fann_save(ann,"good.net"); exit(0); } // printf("\n%6u ",epochs); } printf(" %6.2f inc: %.2f",test_perc,got_inc); // printf("%6.2f %6.2f",train_perc,test_perc); fann_destroy ( ann ); } fann_destroy_train ( train_data ); fann_destroy_train ( test_data ); fann_destroy ( ann ); return 0; }