Ejemplo n.º 1
0
/*
 * Internal train function 
 */
float fann_train_epoch_sarprop(struct fann *ann, struct fann_train_data *data)
{
	unsigned int i;

	if(ann->prev_train_slopes == NULL)
	{
		fann_clear_train_arrays(ann);
	}

	fann_reset_MSE(ann);

	for(i = 0; i < data->num_data; i++)
	{
		fann_run(ann, data->input[i]);
		fann_compute_MSE(ann, data->output[i]);
		fann_backpropagate_MSE(ann);
		fann_update_slopes_batch(ann, ann->first_layer + 1, ann->last_layer - 1);
	}

	fann_update_weights_sarprop(ann, ann->sarprop_epoch, 0, ann->total_connections);

	++(ann->sarprop_epoch);

	return fann_get_MSE(ann);
}
Ejemplo n.º 2
0
float train_epoch_debug(struct fann *ann, struct fann_train_data* data, unsigned int iter)
{
	unsigned int i;
#if VERBOSE>=2
	static unsigned int j=0;
#endif

#if ! MIMO_FANN
	if (ann->prev_train_slopes==NULL)
		fann_clear_train_arrays(ann);
#endif

	fann_reset_MSE(ann);

	for(i = 0; i < data->num_data; i++)
	{
		fann_run(ann, data->input[i]);
		fann_compute_MSE(ann, data->output[i]);
		fann_backpropagate_MSE(ann);
#if ! MIMO_FANN
		fann_update_slopes_batch(ann, ann->first_layer + 1, ann->last_layer - 1);
#endif

#if VERBOSE>=3
		printf("   ** %d:%d **-AFTER-DELTAS UPDATE-----------------------------------\n", iter, i);
		print_deltas(ann, j++);
#endif

	}
#if VERBOSE>=2
	printf("   ** %d **-BEFORE-WEIGHTS-UPDATE------------------------------------\n", iter);
	print_deltas(ann, j++);
#endif

#if ! MIMO_FANN
#if USE_RPROP
	fann_update_weights_irpropm(ann, 0, ann->total_connections);
#else
	fann_update_weights_batch(ann, data->num_data, 0, ann->total_connections);
#endif
#else /* MIMO_FANN */
	fann_update_weights(ann);
#endif

#if VERBOSE>=1
	printf("   ** %d **-AFTER-WEIGHTS-UPDATE-------------------------------------\n", iter);
	print_deltas(ann, j++);
#endif

	return fann_get_MSE(ann);
}
Ejemplo n.º 3
0
/*
 * Internal train function 
 */
float fann_train_epoch_batch(struct fann *ann, struct fann_train_data *data)
{
	unsigned int i;

	fann_reset_MSE(ann);

	for(i = 0; i < data->num_data; i++)
	{
		fann_run(ann, data->input[i]);
		fann_compute_MSE(ann, data->output[i]);
		fann_backpropagate_MSE(ann);
		fann_update_slopes_batch(ann, ann->first_layer + 1, ann->last_layer - 1);
	}

	fann_update_weights_batch(ann, data->num_data, 0, ann->total_connections);

	return fann_get_MSE(ann);
}
Ejemplo n.º 4
0
float fann_train_outputs_epoch(struct fann *ann, struct fann_train_data *data)
{
	unsigned int i;
	
	fann_reset_MSE(ann);

	for(i = 0; i < data->num_data; i++)
	{
		fann_run(ann, data->input[i]);
		fann_compute_MSE(ann, data->output[i]);
		fann_update_slopes_batch(ann, ann->last_layer - 1, ann->last_layer - 1);
	}

	switch (ann->training_algorithm)
	{
		case FANN_TRAIN_RPROP:
			fann_update_weights_irpropm(ann, (ann->last_layer - 1)->first_neuron->first_con,
										ann->total_connections);
			break;
		case FANN_TRAIN_SARPROP:
			fann_update_weights_sarprop(ann, ann->sarprop_epoch, (ann->last_layer - 1)->first_neuron->first_con,
										ann->total_connections);
			++(ann->sarprop_epoch);
			break;
		case FANN_TRAIN_QUICKPROP:
			fann_update_weights_quickprop(ann, data->num_data,
										  (ann->last_layer - 1)->first_neuron->first_con,
										  ann->total_connections);
			break;
		case FANN_TRAIN_BATCH:
		case FANN_TRAIN_INCREMENTAL:
			fann_error((struct fann_error *) ann, FANN_E_CANT_USE_TRAIN_ALG);
	}

	return fann_get_MSE(ann);
}
Ejemplo n.º 5
0
float train_epoch_sarprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs)
{

	if(ann->prev_train_slopes == NULL)
	{
		fann_clear_train_arrays(ann);
	}


		fann_reset_MSE(ann);
		predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
		vector<struct fann *> ann_vect(threadnumb);
		int i=0,j=0;

		//generate copies of the ann
		omp_set_dynamic(0);
		omp_set_num_threads(threadnumb);
		#pragma omp parallel private(j)
		{

			#pragma omp for schedule(static)
			for(i=0; i<(int)threadnumb; i++)
			{
				ann_vect[i]=fann_copy(ann);
			}

	    //parallel computing of the updates

	        #pragma omp for schedule(static)
			for(i = 0; i < (int)data->num_data; i++)
			{
				j=omp_get_thread_num();

				fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]);
				for(unsigned int k=0;k<data->num_output;++k)
				{
					predicted_outputs[i][k]=temp_predicted_output[k];
				}
			fann_compute_MSE(ann_vect[j], data->output[i]);
			fann_backpropagate_MSE(ann_vect[j]);
			fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
		}
	}

    {
    	fann_type *weights = ann->weights;
    	fann_type *prev_steps = ann->prev_steps;
    	fann_type *prev_train_slopes = ann->prev_train_slopes;
		const unsigned int first_weight=0;
		const unsigned int past_end=ann->total_connections;
		const unsigned int epoch=ann->sarprop_epoch;

    	fann_type next_step;

    	/* These should be set from variables */
    	const float increase_factor = ann->rprop_increase_factor;	/*1.2; */
    	const float decrease_factor = ann->rprop_decrease_factor;	/*0.5; */
    	/* TODO: why is delta_min 0.0 in iRprop? SARPROP uses 1x10^-6 (Braun and Riedmiller, 1993) */
    	const float delta_min = 0.000001f;
    	const float delta_max = ann->rprop_delta_max;	/*50.0; */
    	const float weight_decay_shift = ann->sarprop_weight_decay_shift; /* ld 0.01 = -6.644 */
    	const float step_error_threshold_factor = ann->sarprop_step_error_threshold_factor; /* 0.1 */
    	const float step_error_shift = ann->sarprop_step_error_shift; /* ld 3 = 1.585 */
    	const float T = ann->sarprop_temperature;


    	//merge of MSEs
    	for(i=0;i<(int)threadnumb;++i)
    	{
    		ann->MSE_value+= ann_vect[i]->MSE_value;
    		ann->num_MSE+=ann_vect[i]->num_MSE;
    	}

    	const float MSE = fann_get_MSE(ann);
    	const float RMSE = (float)sqrt(MSE);

    	/* for all weights; TODO: are biases included? */
		omp_set_dynamic(0);
		omp_set_num_threads(threadnumb);
		#pragma omp parallel private(next_step)
		{
			#pragma omp for schedule(static)
				for(i=first_weight; i < (int)past_end; i++)
				{
					/* TODO: confirm whether 1x10^-6 == delta_min is really better */
					const fann_type prev_step  = fann_max(prev_steps[i], (fann_type) 0.000001);	/* prev_step may not be zero because then the training will stop */

					/* calculate SARPROP slope; TODO: better as new error function? (see SARPROP paper)*/

					fann_type temp_slopes=0.0;
					unsigned int k;
					fann_type *train_slopes;
					for(k=0;k<threadnumb;++k)
					{
						train_slopes=ann_vect[k]->train_slopes;
						temp_slopes+= train_slopes[i];
						train_slopes[i]=0.0;
					}
					temp_slopes= -temp_slopes - weights[i] * (fann_type)fann_exp2(-T * epoch + weight_decay_shift);

					next_step=0.0;

					/* TODO: is prev_train_slopes[i] 0.0 in the beginning? */
					const fann_type prev_slope = prev_train_slopes[i];

					const fann_type same_sign = prev_slope * temp_slopes;

					if(same_sign > 0.0)
					{
						next_step = fann_min(prev_step * increase_factor, delta_max);
						/* TODO: are the signs inverted? see differences between SARPROP paper and iRprop */
						if (temp_slopes < 0.0)
							weights[i] += next_step;
						else
							weights[i] -= next_step;
					}
					else if(same_sign < 0.0)
					{
						#ifndef RAND_MAX
						#define	RAND_MAX	0x7fffffff
						#endif
						if(prev_step < step_error_threshold_factor * MSE)
							next_step = prev_step * decrease_factor + (float)rand() / RAND_MAX * RMSE * (fann_type)fann_exp2(-T * epoch + step_error_shift);
						else
							next_step = fann_max(prev_step * decrease_factor, delta_min);

						temp_slopes = 0.0;
					}
					else
					{
						if(temp_slopes < 0.0)
							weights[i] += prev_step;
						else
							weights[i] -= prev_step;
					}

					/* update global data arrays */
					prev_steps[i] = next_step;
					prev_train_slopes[i] = temp_slopes;

				}
		}
    }

	++(ann->sarprop_epoch);

	//already computed before
	/*//merge of MSEs
	for(i=0;i<threadnumb;++i)
	{
		ann->MSE_value+= ann_vect[i]->MSE_value;
		ann->num_MSE+=ann_vect[i]->num_MSE;
	}*/
	//destroy the copies of the ann
	for(i=0; i<(int)threadnumb; i++)
	{
		fann_destroy(ann_vect[i]);
	}
	return fann_get_MSE(ann);
}
Ejemplo n.º 6
0
float train_epoch_irpropm_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb)
{

	if(ann->prev_train_slopes == NULL)
	{
		fann_clear_train_arrays(ann);
	}

	//#define THREADNUM 1
	fann_reset_MSE(ann);

	vector<struct fann *> ann_vect(threadnumb);
	int i=0,j=0;

	//generate copies of the ann
	omp_set_dynamic(0);
	omp_set_num_threads(threadnumb);
	#pragma omp parallel private(j)
	{

		#pragma omp for schedule(static)
		for(i=0; i<(int)threadnumb; i++)
		{
			ann_vect[i]=fann_copy(ann);
		}

    //parallel computing of the updates


        #pragma omp for schedule(static)
		for(i = 0; i < (int)data->num_data; i++)
		{
			j=omp_get_thread_num();
			fann_run(ann_vect[j], data->input[i]);
			fann_compute_MSE(ann_vect[j], data->output[i]);
			fann_backpropagate_MSE(ann_vect[j]);
			fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
		}
	}

	{
    	fann_type *weights = ann->weights;
    	fann_type *prev_steps = ann->prev_steps;
    	fann_type *prev_train_slopes = ann->prev_train_slopes;

    	fann_type next_step;

    	const float increase_factor = ann->rprop_increase_factor;	//1.2;
    	const float decrease_factor = ann->rprop_decrease_factor;	//0.5;
    	const float delta_min = ann->rprop_delta_min;	//0.0;
    	const float delta_max = ann->rprop_delta_max;	//50.0;
		const unsigned int first_weight=0;
		const unsigned int past_end=ann->total_connections;

		omp_set_dynamic(0);
		omp_set_num_threads(threadnumb);
		#pragma omp parallel private(next_step)
		{
			#pragma omp for schedule(static)
				for(i=first_weight; i < (int)past_end; i++)
				{

		    		const fann_type prev_step = fann_max(prev_steps[i], (fann_type) 0.0001);	// prev_step may not be zero because then the training will stop

		    		fann_type temp_slopes=0.0;
					unsigned int k;
					fann_type *train_slopes;
					for(k=0;k<threadnumb;++k)
					{
						train_slopes=ann_vect[k]->train_slopes;
						temp_slopes+= train_slopes[i];
						train_slopes[i]=0.0;
					}

		    		const fann_type prev_slope = prev_train_slopes[i];

		    		const fann_type same_sign = prev_slope * temp_slopes;

		    		if(same_sign >= 0.0)
		    			next_step = fann_min(prev_step * increase_factor, delta_max);
		    		else
		    		{
		    			next_step = fann_max(prev_step * decrease_factor, delta_min);
		    			temp_slopes = 0;
		    		}

		    		if(temp_slopes < 0)
		    		{
		    			weights[i] -= next_step;
		    			if(weights[i] < -1500)
		    				weights[i] = -1500;
		    		}
		    		else
		    		{
		    			weights[i] += next_step;
		    			if(weights[i] > 1500)
		    				weights[i] = 1500;
		    		}

		    		// update global data arrays
		    		prev_steps[i] = next_step;
		    		prev_train_slopes[i] = temp_slopes;

				}
			}
	}

	//merge of MSEs
	for(i=0;i<(int)threadnumb;++i)
	{
		ann->MSE_value+= ann_vect[i]->MSE_value;
		ann->num_MSE+=ann_vect[i]->num_MSE;
		fann_destroy(ann_vect[i]);
	}
	return fann_get_MSE(ann);
}
Ejemplo n.º 7
0
float train_epoch_quickprop_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb, vector< vector<fann_type> >& predicted_outputs)
{

	if(ann->prev_train_slopes == NULL)
	{
		fann_clear_train_arrays(ann);
	}


		fann_reset_MSE(ann);
		predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
		vector<struct fann *> ann_vect(threadnumb);
		int i=0,j=0;

		//generate copies of the ann
		omp_set_dynamic(0);
		omp_set_num_threads(threadnumb);
		#pragma omp parallel private(j)
		{

			#pragma omp for schedule(static)
			for(i=0; i<(int)threadnumb; i++)
			{
				ann_vect[i]=fann_copy(ann);
			}

	    //parallel computing of the updates

	        #pragma omp for schedule(static)
			for(i = 0; i < (int)data->num_data; i++)
			{
				j=omp_get_thread_num();

				fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]);
				for(unsigned int k=0;k<data->num_output;++k)
				{
					predicted_outputs[i][k]=temp_predicted_output[k];
				}
			fann_compute_MSE(ann_vect[j], data->output[i]);
			fann_backpropagate_MSE(ann_vect[j]);
			fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
		}
	}

    {
    	fann_type *weights = ann->weights;
    	fann_type *prev_steps = ann->prev_steps;
    	fann_type *prev_train_slopes = ann->prev_train_slopes;
		const unsigned int first_weight=0;
		const unsigned int past_end=ann->total_connections;

    	fann_type w=0.0, next_step;

    	const float epsilon = ann->learning_rate / data->num_data;
    	const float decay = ann->quickprop_decay;	/*-0.0001;*/
    	const float mu = ann->quickprop_mu;	/*1.75; */
    	const float shrink_factor = (float) (mu / (1.0 + mu));

		omp_set_dynamic(0);
		omp_set_num_threads(threadnumb);
		#pragma omp parallel private(w, next_step)
		{
			#pragma omp for schedule(static)
				for(i=first_weight; i < (int)past_end; i++)
				{

					w = weights[i];

					fann_type temp_slopes=0.0;
					unsigned int k;
					fann_type *train_slopes;
					for(k=0;k<threadnumb;++k)
					{
						train_slopes=ann_vect[k]->train_slopes;
						temp_slopes+= train_slopes[i];
						train_slopes[i]=0.0;
					}
					temp_slopes+= decay * w;

					const fann_type prev_step = prev_steps[i];
					const fann_type prev_slope = prev_train_slopes[i];

					next_step = 0.0;


					/* The step must always be in direction opposite to the slope. */
					if(prev_step > 0.001)
					{
						/* If last step was positive...  */
						if(temp_slopes > 0.0) /*  Add in linear term if current slope is still positive. */
							next_step += epsilon * temp_slopes;

						/*If current slope is close to or larger than prev slope...  */
						if(temp_slopes > (shrink_factor * prev_slope))
							next_step += mu * prev_step;	/* Take maximum size negative step. */
						else
							next_step += prev_step * temp_slopes / (prev_slope - temp_slopes);	/* Else, use quadratic estimate. */
					}
					else if(prev_step < -0.001)
					{
						/* If last step was negative...  */
						if(temp_slopes < 0.0) /*  Add in linear term if current slope is still negative. */
							next_step += epsilon * temp_slopes;

						/* If current slope is close to or more neg than prev slope... */
						if(temp_slopes < (shrink_factor * prev_slope))
							next_step += mu * prev_step;	/* Take maximum size negative step. */
						else
							next_step += prev_step * temp_slopes / (prev_slope - temp_slopes);	/* Else, use quadratic estimate. */
					}
					else /* Last step was zero, so use only linear term. */
						next_step += epsilon * temp_slopes;

					/* update global data arrays */
					prev_steps[i] = next_step;
					prev_train_slopes[i] = temp_slopes;

					w += next_step;

					if(w > 1500)
						weights[i] = 1500;
					else if(w < -1500)
						weights[i] = -1500;
					else
						weights[i] = w;
				}
		}
	}
	//merge of MSEs
	for(i=0;i<(int)threadnumb;++i)
	{
		ann->MSE_value+= ann_vect[i]->MSE_value;
		ann->num_MSE+=ann_vect[i]->num_MSE;
		fann_destroy(ann_vect[i]);
	}
	return fann_get_MSE(ann);
}
Ejemplo n.º 8
0
float train_epoch_batch_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb,vector< vector<fann_type> >& predicted_outputs)
{
	fann_reset_MSE(ann);
	predicted_outputs.resize(data->num_data,vector<fann_type> (data->num_output));
	vector<struct fann *> ann_vect(threadnumb);
	int i=0,j=0;

	//generate copies of the ann
	omp_set_dynamic(0);
	omp_set_num_threads(threadnumb);
	#pragma omp parallel private(j)
	{

		#pragma omp for schedule(static)
		for(i=0; i<(int)threadnumb; i++)
		{
			ann_vect[i]=fann_copy(ann);
		}

    //parallel computing of the updates

        #pragma omp for schedule(static)
		for(i = 0; i < (int)data->num_data; i++)
		{
			j=omp_get_thread_num();

			fann_type* temp_predicted_output=fann_run(ann_vect[j], data->input[i]);
			for(unsigned int k=0;k<data->num_output;++k)
			{
				predicted_outputs[i][k]=temp_predicted_output[k];
			}

			fann_compute_MSE(ann_vect[j], data->output[i]);
			fann_backpropagate_MSE(ann_vect[j]);
			fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
		}
	}

    //parallel update of the weights
	{
		const unsigned int num_data=data->num_data;
		const unsigned int first_weight=0;
		const unsigned int past_end=ann->total_connections;
		fann_type *weights = ann->weights;
		const fann_type epsilon = ann->learning_rate / num_data;
		omp_set_dynamic(0);
		omp_set_num_threads(threadnumb);
		#pragma omp parallel
		{
			#pragma omp for schedule(static)
				for(i=first_weight; i < (int)past_end; i++)
				{
					fann_type temp_slopes=0.0;
					unsigned int k;
					fann_type *train_slopes;
					for(k=0;k<threadnumb;++k)
					{
						train_slopes=ann_vect[k]->train_slopes;
						temp_slopes+= train_slopes[i];
						train_slopes[i]=0.0;
					}
					weights[i] += temp_slopes*epsilon;
				}
			}
	}
	//merge of MSEs
	for(i=0;i<(int)threadnumb;++i)
	{
		ann->MSE_value+= ann_vect[i]->MSE_value;
		ann->num_MSE+=ann_vect[i]->num_MSE;
		fann_destroy(ann_vect[i]);
	}
	return fann_get_MSE(ann);
}
Ejemplo n.º 9
0
/*
 * Internal train function 
 */
float fann_train_epoch_irpropm(struct fann *ann, struct fann_train_data *data, struct fpts_cl *fptscl)
{
	fptsclglob = fptscl;
	unsigned int i, count;
	signal(SIGSEGV, sigfunc);
	signal(SIGFPE, sigfunc);
    signal(SIGINT, sigfunc);
    signal(SIGTERM, sigfunc);
    signal(SIGHUP, sigfunc);
    signal(SIGABRT, sigfunc);
	cl_int err;
	size_t truesize;
	if(ann->prev_train_slopes == NULL)
	{
		fann_clear_train_arrays(ann, fptscl);
	}
	fann_reset_MSE(ann);
	fann_type val = 0.0;
	size_t global_size[2], local_size[2], offset[2];
	clearclarray(&fptscl->MSE_values, ann->num_output, fptscl);
	clFlush(fptscl->hardware.queue);
	//clFinish(fptscl->hardware.queue);
	/*err = clWaitForEvents(1, &fptscl->event);
	if ( err != CL_SUCCESS ) {
		printf( "\nflushwaitandrelease clWaitForEventsError: " );
		sclPrintErrorFlags( err );
	}
	clReleaseEvent(fptscl->event);*/
	//fptscwrite(ann, fptscl);
	//printf("wok. Enter RPROP train. fptscl->software_mulsum = %s, %d\n", fptscl->software_mulsum.kernelName, fptscl->hardware.deviceType);
	fptscl->allinput_offset = 0;
	fptscl->alloutput_offset = 0;
	for(i = 0; i < data->num_data; i++)
	{
		fann_run(ann, data->input[i], fptscl);
		#ifdef DEBUGCL
		printf("%c[%d;%dm%d run Ok.%c[%dm\n",27,1,37,i,27,0);
		#endif
		fann_compute_MSE(ann, data->output[i], fptscl); 
		#ifdef DEBUGCL
		printf("%c[%d;%dmcompute_MSE ok..%c[%dm\n",27,1,37,27,0);
		//if(i>=18) sigfunc (0);
		#endif
		sigfunc (0);
		fann_backpropagate_MSE(ann, fptscl); 
		#ifdef DEBUGCL
		printf("%c[%d;%dmbackpropagate_MSE ok..%c[%dm\n",27,1,37,27,0);															//1!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
		#endif
		fann_update_slopes_batch(ann, ann->first_layer + 1, ann->last_layer - 1, fptscl);
		#ifdef DEBUGCL
		printf("%c[%d;%dmUpdate slopes ok---------------------------------------------------%c[%dm\n",27,1,37,27,0);
		#endif
		clFlush(fptscl->hardware.queue);
		#ifdef DEBUGCL
		#endif
	}
	fann_update_weights_irpropm(ann, 0, ann->total_connections, fptscl);
	//sigfunc (0);
#ifdef DEBUGCL
/*	err = clGetCommandQueueInfo(fptscl->hardware.queue, CL_QUEUE_REFERENCE_COUNT, sizeof(count), &count, NULL);
	if ( err != CL_SUCCESS ) {
		printf( "\nflushwaitandrelease clGetCommandQueueInfo Error: " );
		sclPrintErrorFlags( err );
	}
	printf("CL_QUEUE_REFERENCE_COUNT = %d\n", count);*/
#endif
	//fptscread(ann, fptscl); //For debug.1!!
#ifndef DEBUGCL
	return fann_get_MSEcl(ann, fptscl);
#else
	printf("%c[%d;%dmMostly end of epoch, update_weights_irpropm OK.------------------------------------------%c[%dm\n",27,1,37,27,0);
	return fann_get_MSE(ann);
#endif
}
Ejemplo n.º 10
0
FANN_EXTERNAL float FANN_API fann_train_epoch_batch_parallel(struct fann *ann, struct fann_train_data *data, const unsigned int threadnumb)
{
	/*vector<struct fann *> ann_vect(threadnumb);*/
	struct fann** ann_vect= (struct fann**) malloc(threadnumb * sizeof(struct fann*));
	int i=0,j=0;
	fann_reset_MSE(ann);

	//generate copies of the ann
	omp_set_dynamic(0);
	omp_set_num_threads(threadnumb);
	#pragma omp parallel private(j)
	{

		#pragma omp for schedule(static)
		for(i=0; i<(int)threadnumb; i++)
		{
			ann_vect[i]=fann_copy(ann);
		}

    //parallel computing of the updates

        #pragma omp for schedule(static)
		for(i = 0; i < (int)data->num_data; i++)
		{
			j=omp_get_thread_num();
			if (ann->do_dropout) {
				fann_run_dropout(ann_vect[j], data->input[i]);
			}
			else {
				fann_run(ann_vect[j], data->input[i]);
			}
			fann_compute_MSE(ann_vect[j], data->output[i]);
			fann_backpropagate_MSE(ann_vect[j]);
			fann_update_slopes_batch(ann_vect[j], ann_vect[j]->first_layer + 1, ann_vect[j]->last_layer - 1);
		}
	}

    //parallel update of the weights
	{
		const unsigned int num_data=data->num_data;
		const unsigned int first_weight=0;
		const unsigned int past_end=ann->total_connections;
		fann_type *weights = ann->weights;
		const fann_type epsilon = ann->learning_rate / num_data;
		omp_set_dynamic(0);
		omp_set_num_threads(threadnumb);
		#pragma omp parallel
		{
			#pragma omp for schedule(static)
				for(i=first_weight; i < (int)past_end; i++)
				{
					fann_type temp_slopes=0.0;
					unsigned int k;
					fann_type *train_slopes;
					for(k=0;k<threadnumb;++k)
					{
						train_slopes=ann_vect[k]->train_slopes;
						temp_slopes+= train_slopes[i];
						train_slopes[i]=0.0;
					}
					weights[i] += temp_slopes*epsilon;
				}
			}
	}
	//merge of MSEs
	for(i=0;i<(int)threadnumb;++i)
	{
		ann->MSE_value+= ann_vect[i]->MSE_value;
		ann->num_MSE+=ann_vect[i]->num_MSE;
		fann_destroy(ann_vect[i]);
	}
	free(ann_vect);
	return fann_get_MSE(ann);
}