Example #1
0
int rand_pickf
( float *p,
  int n
)
{ 
  double t, r;
  int i;

  t = 0;
  for (i = 0; i<n; i++)
  { if (p[i]<=0) abort();
    t += p[i];
  }

  if (t<=0) abort();

  r = t * rand_uniform();

  for (i = 0; i<n; i++)
  { r -= p[i];
    if (r<0) return i;
  }

  /* Return value with non-zero probability if we get here due to roundoff. */

  for (i = 0; i<n; i++) 
  { if (p[i]>0) return i;
  }

  abort(); 
}
Example #2
0
float get_current_rate(network net)
{
    size_t batch_num = get_current_batch(net);
    int i;
    float rate;
    if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power);
    switch (net.policy) {
        case CONSTANT:
            return net.learning_rate;
        case STEP:
            return net.learning_rate * pow(net.scale, batch_num/net.step);
        case STEPS:
            rate = net.learning_rate;
            for(i = 0; i < net.num_steps; ++i){
                if(net.steps[i] > batch_num) return rate;
                rate *= net.scales[i];
                //if(net.steps[i] > batch_num - 1 && net.scales[i] > 1) reset_momentum(net);
            }
            return rate;
        case EXP:
            return net.learning_rate * pow(net.gamma, batch_num);
        case POLY:
            return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
        case RANDOM:
            return net.learning_rate * pow(rand_uniform(0,1), net.power);
        case SIG:
            return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step))));
        default:
            fprintf(stderr, "Policy is weird!\n");
            return net.learning_rate;
    }
}
Example #3
0
void restart (float ** simplex, float * response, 
	      float * step_size)
{
  const float STEP_FACTOR = 0.9;
  int i, j;
  int worst, next, best;
  float minval, maxval;


  /* find the current best vertex */
  eval_vertices (response, &worst, &next, &best); 

  /* set the first vertex to the current best */
  for (i = 0; i < DIMENSION;  i++)
    simplex[0][i] = simplex[best][i];

  /* decrease step size */
  for (i = 0;  i < DIMENSION;  i++)
    step_size[i] *= STEP_FACTOR;

  /* set up remaining vertices of simplex using new step size */
  for (i = 1;  i < DIMENSION+1;  i++)
    for (j = 0;  j < DIMENSION;  j++)
      {
	minval = simplex[0][j] - step_size[j];
	maxval = simplex[0][j] + step_size[j];
      simplex[i][j] = rand_uniform (minval, maxval);
      }

  /* initialize response for each vector */
  for (i = 0;  i < DIMENSION+1;  i++)
    response[i] = calc_error (simplex[i]);
}
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size,
		int stride, int pad, ACTIVATION activation) {
	int i;
	local_layer l = { 0 };
	l.type = LOCAL;

	l.h = h;
	l.w = w;
	l.c = c;
	l.n = n;
	l.batch = batch;
	l.stride = stride;
	l.size = size;
	l.pad = pad;

	int out_h = local_out_height(l);
	int out_w = local_out_width(l);
	int locations = out_h * out_w;
	l.out_h = out_h;
	l.out_w = out_w;
	l.out_c = n;
	l.outputs = l.out_h * l.out_w * l.out_c;
	l.inputs = l.w * l.h * l.c;

	l.weights = calloc(c * n * size * size * locations, sizeof(float));
	l.weight_updates = calloc(c * n * size * size * locations, sizeof(float));

	l.biases = calloc(l.outputs, sizeof(float));
	l.bias_updates = calloc(l.outputs, sizeof(float));

	// float scale = 1./sqrt(size*size*c);
	float scale = sqrt(2. / (size * size * c));
	for (i = 0; i < c * n * size * size; ++i)
		l.weights[i] = scale * rand_uniform(-1, 1);

	l.col_image = calloc(out_h * out_w * size * size * c, sizeof(float));
	l.output = calloc(l.batch * out_h * out_w * n, sizeof(float));
	l.delta = calloc(l.batch * out_h * out_w * n, sizeof(float));

#ifdef GPU
	l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations);
	l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations);

	l.biases_gpu = cuda_make_array(l.biases, l.outputs);
	l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs);

	l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c);
	l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
	l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);

#endif
	l.activation = activation;

	fprintf(stderr,
			"Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n",
			h, w, c, n, out_h, out_w, n);

	return l;
}
Example #5
0
double rand_gaussian (void)
{
  double a, b;

  a = rand_uniform();
  b = rand_uniopen();

  return cos(2.0*M_PI*a) * sqrt(-2.0*log(b));
}
Example #6
0
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
{
    int i;
    convolutional_layer l = {0};
    l.type = CONVOLUTIONAL;

    l.h = h;
    l.w = w;
    l.c = c;
    l.n = n;
    l.batch = batch;
    l.stride = stride;
    l.size = size;
    l.pad = pad;

    l.filters = calloc(c*n*size*size, sizeof(float));
    l.filter_updates = calloc(c*n*size*size, sizeof(float));

    l.biases = calloc(n, sizeof(float));
    l.bias_updates = calloc(n, sizeof(float));
    //float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c));
	printf("%f\n", scale);
    for(i = 0; i < c*n*size*size; ++i) l.filters[i] = 2*scale*rand_uniform() - scale;
    for(i = 0; i < n; ++i){
        l.biases[i] = scale;
    }
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    l.out_h = out_h;
    l.out_w = out_w;
    l.out_c = n;
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = l.w * l.h * l.c;

    l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));

    #ifdef GPU
    l.filters_gpu = cuda_make_array(l.filters, c*n*size*size);
    l.filter_updates_gpu = cuda_make_array(l.filter_updates, c*n*size*size);

    l.biases_gpu = cuda_make_array(l.biases, n);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);

    l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c);
    l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
    l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
    #endif
    l.activation = activation;

    fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);

    return l;
}
void forward_dropout_layer(dropout_layer l, network_state state)
{
    int i;
    if (!state.train) return;
    for(i = 0; i < l.batch * l.inputs; ++i){
        float r = rand_uniform();
        l.rand[i] = r;
        if(r < l.probability) state.input[i] = 0;
        else state.input[i] *= l.scale;
    }
}
Example #8
0
void simplex_initialize (float * parameters, float ** simplex, 
			 float * response, float * step_size)
{
  int i, j;
  int worst, next, best;
  float resp;
  float minval, maxval;


  for (j = 0;  j < DIMENSION;  j++)
    {
      simplex[0][j] = parameters[j];
      step_size[j] = 0.5 * parameters[j];
    }

  for (i = 1;  i < DIMENSION+1;  i++)
    for (j = 0;  j < DIMENSION;  j++)
      {
	minval = simplex[0][j] - step_size[j];
	maxval = simplex[0][j] + step_size[j];
	simplex[i][j] = rand_uniform (minval, maxval);
      }

  for (i = 0;  i < DIMENSION+1;  i++)
      response[i] = calc_error (simplex[i]);

  for (i = 1;  i < 500;  i++)
    {
      for (j = 0;  j < DIMENSION;  j++)
	{
	  minval = simplex[0][j] - step_size[j];
	  maxval = simplex[0][j] + step_size[j];
	  parameters[j] = rand_uniform (minval, maxval);
	}

      resp = calc_error (parameters);
      eval_vertices (response, &worst, &next, &best);
      if (resp < response[worst])
	replace (simplex, response, worst, parameters, resp);
    }
}
Example #9
0
int sample_array(float *a, int n)
{
    float sum = sum_array(a, n);
    scale_array(a, n, 1./sum);
    float r = rand_uniform(0, 1);
    int i;
    for(i = 0; i < n; ++i){
        r = r - a[i];
        if (r <= 0) return i;
    }
    return n-1;
}
Example #10
0
static int init_gr()
        {
        int jj;
        int group;

        hav=muste_fopen(tempfile,"r+b");
        for (jj=0L; jj<n; ++jj)
            {
            group=ng*rand_uniform()+1;
            hav_write1(jj,&group);
            }
        muste_fclose(hav);
        return(1);
        }
Example #11
0
double prob(int n_planets, input_orbit* io, double sigma_i) {
  double sum = 0;
  orbit o[MAX_PLANETS];
  planet_ellipse pe[MAX_PLANETS];
  for (int i = 0; i < NTRIALS; i++) {
    for (int j = 0; j < n_planets; j++) {
      io[j].i     = rand_Rayleigh (sigma_i) / RAD_TO_DEG;
      io[j].Omega = rand_uniform  (2 * PI); 
      o[j]        = input_orbit_to_orbit (io[j]);
      o[j].use    = 1;
      pe[j]  = convert (o[j]);
    }
    // as e = 0 for all planets, values are EXACT
    sum += prob_of_transits_approx (n_planets, pe);
  }
  return sum / NTRIALS;
}
Example #12
0
connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation)
{
    int i;
    connected_layer l = {0};
    l.type = CONNECTED;

    l.inputs = inputs;
    l.outputs = outputs;
    l.batch=batch;

    l.output = calloc(batch*outputs, sizeof(float*));
    l.delta = calloc(batch*outputs, sizeof(float*));

    l.weight_updates = calloc(inputs*outputs, sizeof(float));
    l.bias_updates = calloc(outputs, sizeof(float));

    l.weights = calloc(outputs*inputs, sizeof(float));
    l.biases = calloc(outputs, sizeof(float));


    //float scale = 1./sqrt(inputs);
    float scale = sqrt(2./inputs);
    for(i = 0; i < outputs*inputs; ++i){
        l.weights[i] = scale*rand_uniform(-1, 1);
    }

    for(i = 0; i < outputs; ++i){
        l.biases[i] = scale;
    }

#ifdef GPU
    l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
    l.biases_gpu = cuda_make_array(l.biases, outputs);

    l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs);

    l.output_gpu = cuda_make_array(l.output, outputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, outputs*batch);
#endif
    l.activation = activation;
    fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
    return l;
}
Example #13
0
double prob(double sigma_i)
{
    double sum = 0;
    for (int i = 0; i < NTRIALS; i++)
    {
        //        for (int k = 0; k < n_planets; k++)
        //  {
        for (int j = 0; j < n_planets; j++)
        {
            IO90[j].i     = rand_Rayleigh (sigma_i) / RAD_TO_DEG;
            IO90[j].Omega = rand_uniform  (2 * PI);
            O90[j]        = input_orbit_to_orbit (IO90[j]);
            O90[j].use    = use[j]; // (int) (k != j);
            planets90[j]  = convert (O90[j]);
        }
        // as e = 0 for all planets, values are EXACT
        sum += prob_of_transits_approx (n_planets, planets90);
        //        }
    }
    return sum / NTRIALS;
}
Example #14
0
void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed)
{
    srand(rseed);
    char *base = basecfg(cfgfile);
    fprintf(stderr, "%s\n", base);

    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    int inputs = get_network_input_size(net);

    int i, j;
    for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
    unsigned char c;
    int len = strlen(seed);
    float *input = calloc(inputs, sizeof(float));
    for(i = 0; i < len-1; ++i){
        c = seed[i];
        input[(int)c] = 1;
        network_predict(net, input);
        input[(int)c] = 0;
        printf("%c", c);
    }
    c = seed[len-1];
    for(i = 0; i < num; ++i){
        printf("%c", c);
        float r = rand_uniform(0,1);
        float sum = 0;
        input[(int)c] = 1;
        float *out = network_predict(net, input);
        input[(int)c] = 0;
        for(j = 0; j < inputs; ++j){
            sum += out[j];
            if(sum > r) break;
        }
        c = j;
    }
    printf("\n");
}
Example #15
0
// Returns a Gaussian-distributed random number using the Box-Muller method:
// http://en.wikipedia.org/wiki/Box_muller#Basic_form (note: second value discarded)
double rand_gaussian() {
	double u1 = rand_uniform(), u2 = rand_uniform();
	return sqrt(-2*log(u1))*cos(2*M_PI*u2);
}
Example #16
0
void test_recursive_splitting_degenerate_perturbed() {

	// recursively splits a polyhedron (starting from a tet) with a cut plane
	// that is always degenerate with the tet in some way,
	// checking to see that the resulting volumes add up properly.
	// In this one, the cut plane is perturbed by a small amount.

#define MIN_PERTURB_ORDER (-17)
#define MAX_PERTURB_ORDER (-1)

	// explicit stack-based implementation
	r3d_int nstack, depth, t, chopt, m;
	r3d_poly polystack[STACK_SIZE];
	r3d_int depthstack[STACK_SIZE];

	// variables: the polyhedra and their moments
	r3d_rvec3 verts[4];
	r3d_plane splane;
	r3d_poly opoly, poly1, poly2;
	r3d_real om[R3D_NUM_MOMENTS(POLY_ORDER)], m1[R3D_NUM_MOMENTS(POLY_ORDER)], m2[R3D_NUM_MOMENTS(POLY_ORDER)];
	r3d_real perturb;

	// do many trials
	printf("Recursively splitting %d tetrahedra, maximum splits per tet is %d.\n", NUM_TRIALS, MAX_DEPTH);
	for(t = 0; t < NUM_TRIALS; ++t) {

		// compute the order of magnitude by which to perturb the clip plane,
		// determined by the trial number
		perturb = pow(10, MIN_PERTURB_ORDER + t%(MAX_PERTURB_ORDER - MIN_PERTURB_ORDER));
		//printf("omag = %d, pow = %.10e\n", MIN_PERTURB_ORDER + t%(MAX_PERTURB_ORDER - MIN_PERTURB_ORDER), perturb);

		// generate a random tet
		rand_tet_3d(verts, MIN_VOL);
		r3d_init_tet(&opoly, verts);
	
		// push the starting tet to the stack
		nstack = 0;
		polystack[nstack] = opoly;
		depthstack[nstack] = 0;
		++nstack;	
	
		// recursively split the poly
		while(nstack > 0) {
	
			// pop the stack
			--nstack;
			opoly = polystack[nstack];
			depth = depthstack[nstack];
	
			// generate a random plane from one of a few
			// possible degenerate configurations, ensuring that it
			// has a valid unit normal
			chopt = rand_int(6);
			do {
				splane = choptions_3d[chopt](&opoly);
			} while(splane.n.x == 0.0 && splane.n.y == 0.0 && splane.n.z == 0.0);

			// randomly perturb the plane
			splane.n.x *= 1.0 + perturb*(rand_uniform() - 0.5);
			splane.n.y *= 1.0 + perturb*(rand_uniform() - 0.5);
			splane.n.z *= 1.0 + perturb*(rand_uniform() - 0.5);
			splane.d *= 1.0 + perturb*(rand_uniform() - 0.5);
	
			// split the poly by making two copies of the original poly
			// and them clipping them against the same plane, with one
			// oriented oppositely
			poly1 = opoly;
			poly2 = opoly;
			r3d_clip(&poly1, &splane, 1);
			splane.n.x *= -1;
			splane.n.y *= -1;
			splane.n.z *= -1;
			splane.d *= -1;
			r3d_clip(&poly2, &splane, 1);

			// reduce the original and its two parts
			r3d_reduce(&opoly, om, POLY_ORDER);
			r3d_reduce(&poly1, m1, POLY_ORDER);
			r3d_reduce(&poly2, m2, POLY_ORDER);
			
			// make sure the sum of moments equals the original 
			for(m = 0; m < R3D_NUM_MOMENTS(POLY_ORDER); ++m) {
				ASSERT_EQ(om[m], m1[m] + m2[m], TOL_FAIL);
				EXPECT_EQ(om[m], m1[m] + m2[m], TOL_WARN);
			}
		
			// make sure neither of the two resulting volumes is larger than the original
			// (within some tolerance)
			ASSERT_LT(m1[0], om[0]*(1.0 + TOL_FAIL));
			EXPECT_LT(m1[0], om[0]*(1.0 + TOL_WARN));
			ASSERT_LT(m2[0], om[0]*(1.0 + TOL_FAIL));
			EXPECT_LT(m2[0], om[0]*(1.0 + TOL_WARN));
	
			//printf("nstack = %d, depth = %d, opoly = %.10e, p1 = %.10e, p2 = %.10e, err = %.10e\n", 
					//nstack, depth, om[0], m1[0], m2[0], fabs(1.0 - om[0]/(m1[0] + m2[0])));
	
			// push the children to the stack if they have
			// an acceptably large volume
			if(depth < MAX_DEPTH) {
				if(m1[0] > MIN_VOL) {
					polystack[nstack] = poly1;
					depthstack[nstack] = depth + 1;
					++nstack;	
				}
				if(m2[0] > MIN_VOL) {
					polystack[nstack] = poly2;
					depthstack[nstack] = depth + 1;
					++nstack;	
				}
			}
		}
	}
}
Example #17
0
void run_nightmare(int argc, char **argv)
{
    srand(0);
    if(argc < 4){
        fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]);
        return;
    }

    char *cfg = argv[2];
    char *weights = argv[3];
    char *input = argv[4];
    int max_layer = atoi(argv[5]);

    int range = find_int_arg(argc, argv, "-range", 1);
    int norm = find_int_arg(argc, argv, "-norm", 1);
    int rounds = find_int_arg(argc, argv, "-rounds", 1);
    int iters = find_int_arg(argc, argv, "-iters", 10);
    int octaves = find_int_arg(argc, argv, "-octaves", 4);
    float zoom = find_float_arg(argc, argv, "-zoom", 1.);
    float rate = find_float_arg(argc, argv, "-rate", .04);
    float thresh = find_float_arg(argc, argv, "-thresh", 1.);
    float rotate = find_float_arg(argc, argv, "-rotate", 0);
    float momentum = find_float_arg(argc, argv, "-momentum", .9);
    float lambda = find_float_arg(argc, argv, "-lambda", .01);
    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
    int reconstruct = find_arg(argc, argv, "-reconstruct");
    int smooth_size = find_int_arg(argc, argv, "-smooth", 1);

    network net = parse_network_cfg(cfg);
    load_weights(&net, weights);
    char *cfgbase = basecfg(cfg);
    char *imbase = basecfg(input);

    set_batch_network(&net, 1);
    image im = load_image_color(input, 0, 0);
    if(0){
        float scale = 1;
        if(im.w > 512 || im.h > 512){
            if(im.w > im.h) scale = 512.0/im.w;
            else scale = 512.0/im.h;
        }
        image resized = resize_image(im, scale*im.w, scale*im.h);
        free_image(im);
        im = resized;
    }

    float *features = 0;
    image update;
    if (reconstruct){
        resize_network(&net, im.w, im.h);

        int zz = 0;
        network_predict(net, im.data);
        image out_im = get_network_image(net);
        image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz);
        //flip_image(crop);
        image f_im = resize_image(crop, out_im.w, out_im.h);
        free_image(crop);
        printf("%d features\n", out_im.w*out_im.h*out_im.c);


        im = resize_image(im, im.w, im.h);
        f_im = resize_image(f_im, f_im.w, f_im.h);
        features = f_im.data;

        int i;
        for(i = 0; i < 14*14*512; ++i){
            features[i] += rand_uniform(-.19, .19);
        }

        free_image(im);
        im = make_random_image(im.w, im.h, im.c);
        update = make_image(im.w, im.h, im.c);

    }

    int e;
    int n;
    for(e = 0; e < rounds; ++e){
        fprintf(stderr, "Iteration: ");
        fflush(stderr);
        for(n = 0; n < iters; ++n){  
            fprintf(stderr, "%d, ", n);
            fflush(stderr);
            if(reconstruct){
                reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1);
                //if ((n+1)%30 == 0) rate *= .5;
                show_image(im, "reconstruction");
#ifdef OPENCV
                cvWaitKey(10);
#endif
            }else{
                int layer = max_layer + rand()%range - range/2;
                int octave = rand()%octaves;
                optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm);
            }
        }
        fprintf(stderr, "done\n");
        if(0){
            image g = grayscale_image(im);
            free_image(im);
            im = g;
        }
        char buff[256];
        if (prefix){
            sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e);
        }else{
            sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e);
        }
        printf("%d %s\n", e, buff);
        save_image(im, buff);
        //show_image(im, buff);
        //cvWaitKey(0);

        if(rotate){
            image rot = rotate_image(im, rotate);
            free_image(im);
            im = rot;
        }
        image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom);
        image resized = resize_image(crop, im.w, im.h);
        free_image(im);
        free_image(crop);
        im = resized;
    }
}
Example #18
0
float rand_scale(float s)
{
    float scale = rand_uniform(1, s);
    if(rand()%2) return scale;
    return 1./scale;
}
Example #19
0
connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize)
{
    int i;
    connected_layer l;// = {0};
	memset(&l, 0, sizeof(connected_layer));
    l.type = CONNECTED;

    l.inputs = inputs;
    l.outputs = outputs;
    l.batch=batch;
    l.batch_normalize = batch_normalize;
    l.h = 1;
    l.w = 1;
    l.c = inputs;
    l.out_h = 1;
    l.out_w = 1;
    l.out_c = outputs;

    l.output = (float*)calloc(batch*outputs, sizeof(float));
    l.delta = (float*)calloc(batch*outputs, sizeof(float));

    l.weight_updates = (float*)calloc(inputs*outputs, sizeof(float));
    l.bias_updates = (float*)calloc(outputs, sizeof(float));

    l.weights = (float*)calloc(outputs*inputs, sizeof(float));
    l.biases = (float*)calloc(outputs, sizeof(float));

    //float scale = 1./sqrt(inputs);
    float scale = sqrt(2./inputs);
    for(i = 0; i < outputs*inputs; ++i){
        l.weights[i] = scale*rand_uniform(-1, 1);
    }

    for(i = 0; i < outputs; ++i){
        l.biases[i] = 0;
    }

    if(batch_normalize){
        l.scales = (float*)calloc(outputs, sizeof(float));
        l.scale_updates = (float*)calloc(outputs, sizeof(float));
        for(i = 0; i < outputs; ++i){
            l.scales[i] = 1;
        }

        l.mean = (float*)calloc(outputs, sizeof(float));
        l.mean_delta = (float*)calloc(outputs, sizeof(float));
        l.variance =(float*) calloc(outputs, sizeof(float));
        l.variance_delta = (float*)calloc(outputs, sizeof(float));

        l.rolling_mean = (float*)calloc(outputs, sizeof(float));
        l.rolling_variance = (float*)calloc(outputs, sizeof(float));

        l.x = (float*)calloc(batch*outputs, sizeof(float));
        l.x_norm = (float*)calloc(batch*outputs, sizeof(float));
    }

#ifdef GPU
    l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
    l.biases_gpu = cuda_make_array(l.biases, outputs);

    l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs);

    l.output_gpu = cuda_make_array(l.output, outputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, outputs*batch);
    if(batch_normalize){
        l.scales_gpu = cuda_make_array(l.scales, outputs);
        l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs);

        l.mean_gpu = cuda_make_array(l.mean, outputs);
        l.variance_gpu = cuda_make_array(l.variance, outputs);

        l.rolling_mean_gpu = cuda_make_array(l.mean, outputs);
        l.rolling_variance_gpu = cuda_make_array(l.variance, outputs);

        l.mean_delta_gpu = cuda_make_array(l.mean, outputs);
        l.variance_delta_gpu = cuda_make_array(l.variance, outputs);

        l.x_gpu = cuda_make_array(l.output, l.batch*outputs);
        l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs);
    }
#endif
    l.activation = activation;
    fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
    return l;
}
Example #20
0
int main( int argc, char* argv[] )
{
    // Dimension of vectors
    // TODO: When I use 1e2, 1e3, 1e4 I get segfaults. Why?
    int n = (int) 1e7;
    int k = 10;
    int nk = n * k;

    // Arbitrary element to evaluate
    float x = 3.14159;

    // Host input vectors
    float *h_coef;
    float *h_controlpts;
    // Host output vector
    float *h_out;

    // Size, in bytes, of each vector
    size_t bytes = nk*sizeof(float);
    size_t outbytes = n*sizeof(float);

    // Allocate memory for each vector on host
    h_coef = (float*)malloc(bytes);
    h_controlpts = (float*)malloc(bytes);
    h_out = (float*)malloc(outbytes);

    // Initialize vectors on host
    int i;
    for( i = 0; i < nk; i++ )
    {
        h_coef[i] = rand_uniform();
        h_controlpts[i] = rand_uniform();
    }

    // Adding this to test for correctness on the first row
    for( i = 0; i < k; i++ )
    {
        h_coef[i] = 1.0;
        h_controlpts[i] = (float) i;
    }


    //------------------------------------------------------------
    // OpenCL starts here

    /* Load kernel source file */
    // From
    // https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/calling-the-kernel/
    FILE *fp;
    const char fileName[] = "./tps_kernel.cl";
    size_t source_size;
    char *kernelSource;

    fp = fopen(fileName, "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    kernelSource = (char *)malloc(MAX_SOURCE_SIZE);
    source_size = fread(kernelSource, 1, MAX_SOURCE_SIZE, fp);
    fclose(fp);

    // Device input buffers
    cl_mem d_coef;
    cl_mem d_controlpts;
    // Device output buffer
    cl_mem d_out;

    cl_platform_id cpPlatform;        // OpenCL platform
    cl_device_id device_id;           // device ID
    cl_context context;               // context
    cl_command_queue queue;           // command queue
    cl_program program;               // program
    cl_kernel kernel;                 // kernel

    size_t globalSize, localSize;
    cl_int err;

    // Number of work items in each local work group
    // TODO: What does this do? This may be a bug, go look!
    // Thi
    localSize = 100;

    // Number of total work items - localSize must be devisor
    globalSize = ceil(n/(float)localSize)*localSize;

    // Bind to platform
    err = clGetPlatformIDs(1, &cpPlatform, NULL);

    // Get ID for the device
    err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);

    // Create a context
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);

    // Create a command queue
    queue = clCreateCommandQueue(context, device_id, 0, &err);

    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1,
                            (const char **) & kernelSource, NULL, &err);

    // Build the program executable
    clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

    // Create the compute kernel in the program we wish to run
    kernel = clCreateKernel(program, "thin_plate_spline", &err);

    // Create the input and output arrays in device memory for our calculation
    d_coef = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_controlpts = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, outbytes, NULL, NULL);

	clock_t t0 = clock();

    // Write our data set into the input array in device memory
    err = clEnqueueWriteBuffer(queue, d_coef, CL_TRUE, 0,
                                   bytes, h_coef, 0, NULL, NULL);
    err |= clEnqueueWriteBuffer(queue, d_controlpts, CL_TRUE, 0,
                                   bytes, h_controlpts, 0, NULL, NULL);

    // Set the arguments to our compute kernel
    err  = clSetKernelArg(kernel, 0, sizeof(float), &x);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_coef);
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_controlpts);
    err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &d_out);
    err |= clSetKernelArg(kernel, 4, sizeof(int), &k);
    err |= clSetKernelArg(kernel, 5, sizeof(int), &n);

	clock_t t1 = clock();
    // Execute the kernel over the entire range of the data set
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,
                                                              0, NULL, NULL);

    // Wait for the command queue to get serviced before reading back results
    clFinish(queue);
	clock_t t2 = clock();

    // Read the results from the device
    clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0,
                                outbytes, h_out, 0, NULL, NULL );
	clock_t t3 = clock();

    printf("Dimensions: n = %i, k = %i\n", n, k);
    printf("\n");
    printf("Transfer input to device (GPU): %f sec\n", time_spent(t0, t1));
    printf("Run compute kernel:             %f sec\n", time_spent(t1, t2));
    printf("Return output to host (CPU):    %f sec\n", time_spent(t2, t3));
    printf("Total:                          %f sec\n", time_spent(t0, t3));
    printf("\n");
    printf("Correct?\n");
    printf("Expected: 143.6215\n");
    printf("Actual: %f\n", h_out[0]);

    // release OpenCL resources
    clReleaseMemObject(d_coef);
    clReleaseMemObject(d_controlpts);
    clReleaseMemObject(d_out);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);

    //release host memory
    free(h_coef);
    free(h_controlpts);
    //*** glibc detected *** ./tpsCL: free(): invalid pointer: 0x00007fc58431f010 ***
    // Why?
    free(h_out);
    free(kernelSource);

    return 0;
}
Example #21
0
int rand_int
( int n
)
{ 
  return (int) (n * rand_uniform());
}
Example #22
0
static int lue_havainnot()
        {
        int j;
        int i,h;
        double y;
        int miss;
        int group;
        char *p;

        hav=muste_fopen(tempfile,"wb");
        if (hav==NULL)
            {
            sprintf(sbuf,"\nCannot open file %s for temporary data!",tempfile);
            sur_print(sbuf); WAIT; return(-1);
            }

        sur_print("\nReading observations... ");
        for (i=0; i<m*m; ++i) T[i]=0.0;
        n=0L;
        for (j=d.l1; j<=d.l2; ++j)
            {
            if (unsuitable(&d,j)) continue;
            miss=0;
            for (i=0; i<m; ++i)
                {
                data_load(&d,j,d.v[i],&y);
                if (y==MISSING8) { miss=1; break; }
                xx[i]=y;
                }
            if (miss) continue;

            if (ivar>=0)
                {
                data_load(&d,j,ivar,&y);
                if (y==MISSING8) continue;
                group=y;

                if (group<=0 || group>ng)
                    { ++ivar_init; group=0; }
                }
            else
                group=ng*rand_uniform()+1;

            p=(char *)&group;
            for (h=0; h<sizeof(int); ++h) putc((int)p[h],hav);
            group=0; p=(char *)&group;
            for (i=0; i<n_saved; ++i)
                for (h=0; h<sizeof(int); ++h) putc((int)p[h],hav);

            p=(char *)&j;
            for (h=0; h<sizeof(int); ++h) putc((int)p[h],hav);
            for (i=0; i<m; ++i)
                {
                p=(char *)&xx[i];
                for (h=0; h<sizeof(double); ++h) putc((int)p[h],hav);
                }
            ++n;

            for (i=0; i<m; ++i)
                for (h=0; h<=i; ++h) T[i+m*h]+=xx[i]*xx[h];

            if (sur_kbhit())
                {
                i=sur_getch(); prind=1-prind;
                }
            if (prind) { sprintf(sbuf,"%d ",j); sur_print(sbuf); }
            }

        n_saved_len=n_saved*sizeof(int);
        hav_len=sizeof(int)+n_saved_len+sizeof(int)+m*sizeof(double);
        muste_fclose(hav);

        for (i=0; i<m; ++i)
            for (h=0; h<i; ++h) T[h+m*i]=T[i+m*h];

        for (i=0; i<n_saved; ++i) { lambda2[i]=1e100; freq[i]=0; }

        return(1);
        }
Example #23
0
	int Load()
	{
		CountdownValue = 3;
		GameRunning = false;

		Cam.Create();

		iteam::Font.push_back(gp2d::Font()); iteam::Font[0].Load("data/gui/ingame/counter.ttf", 64);
		iteam::Font.push_back(gp2d::Font()); iteam::Font[1].Load("data/gui/ingame/eras_bold.TTF", 12);
		iteam::Font.push_back(gp2d::Font()); iteam::Font[2].Load("data/gui/ingame/eras_bold.TTF", 11);

		Audio.push_back(gp2d::Sound());
		Audio.push_back(gp2d::Sound());
		Audio[0].LoadWAV("data/sound/characters/jump.wav");
		Audio[1].LoadWAV("data/sound/interface/clock5.wav");

		Song.push_back(gp2d::Music());
		Song[0].Load("data/music/song1.ogg");
		Song[0].SetLoop(-1);
		//Song[0].Play();

		init_rand();
		cout<<"random seed initialized"<<endl;

		//add players
		for(int i=0;i<2;i++)
		{
			AddPlayer(IT_PLAYER_SUSI,(int)rand_uniform(50,180),50,IT_PLAYER_FACE_RIGHT,1);
			//Change player names to make it easier to differentiate them.
			sprintf(Player[i].Name,"Susi %d",i);
		}

		strcpy(Player[0].Team,"Good");
		strcpy(Player[1].Team,"Evil");

		Tank_base.Load("data/vehicles/tank_base.png", 1, 1);
		Tank_canon.Load("data/vehicles/tank_canon.png", 1, 1);


		Level.push_back(gp2d::Sprite());
		Level.push_back(gp2d::Sprite());
		Level.push_back(gp2d::Sprite());
		Level[0].Load("data/levels/grassymt/terrain.png");
		Level[1].Load("data/levels/grassymt/layer1.png");
		Level[2].Load("data/levels/grassymt/layer2.png");
		Level[0].Move(VIEWPORT_WIDTH/2,VIEWPORT_HEIGHT-Level[0].height[0]/2);
		Level[1].width[0]=2048;
		Level[1].height[0]=1024;
		Level[2].ResizePropW(GameResW);

		InGameGUI.push_back(gp2d::Sprite());
		InGameGUI.push_back(gp2d::Sprite());
		InGameGUI[0].Load("data/gui/ingame/bottom2.png");
		InGameGUI[0].ResizePropW(GameResW);
		InGameGUI[0].Move(VIEWPORT_WIDTH/2,VIEWPORT_HEIGHT-InGameGUI[0].height[0]/2);
		InGameGUI[0].alpha[0]=0.9f;

		InGameGUI[1].Load("data/gui/ingame/countdown_3.png");
		InGameGUI[1].Load("data/gui/ingame/countdown_2.png");
		InGameGUI[1].Load("data/gui/ingame/countdown_1.png");
		InGameGUI[1].Load("data/gui/ingame/countdown_duel.png");
		InGameGUI[1].iterateSheets = true;
		InGameGUI[1].setAnimationSpeed(1.0f);
		InGameGUI[1].animationTimer.Start();

		AnglePointer.Load("data/gui/ingame/angle_pointer.png");
		AnglePointer.Move(iteam::InGameGUI[0].x + 100, iteam::InGameGUI[0].y + 20);

		WeaponSelector.Load("data/weapons/weapon_selector.png");
		WeaponSelector.Scale(38./32.);
		WeaponSelector.Move(THUMBS_WIDTH, 600 - THUMBS_HEIGHT);

		//Test_Init();
		Init_Explosions();
		Weapons_Init();
	}
Chromosome *LocalSearch::run ()
{
	// Creates a random chromossome
	std::vector <double> genes;
	
	for (unsigned k = 0; k < historicalData->openBugList.size(); k++) 
		genes.push_back (rand_uniform());

	// Calculate cost and chromossome size
	Chromosome *chromosome = new Chromosome (historicalData, genes);
	double costReference = chromosome->getCost();
	int evaluations = 1;

	// Hill Climbing local search
	int size = chromosome->genes.size();
	Chromosome *neighbour = chromosome->clone();

	while (evaluations <= maxEvaluations)
	{
		int i = 0, j;

		while (i < size && evaluations <= maxEvaluations)
		{
			for (j = i + 1; j < size && evaluations <= maxEvaluations; j++)
			{
				neighbour->swapGene(i, j);
				neighbour->recalculateSchedule();
				evaluations++;
				
				if (neighbour->getCost() < costReference) 
				{
					delete chromosome;
					chromosome = neighbour;
					costReference = neighbour->getCost();
					neighbour = neighbour->clone();
					break;
				}
				else
					neighbour->swapGene (j, i);
			}

			i = j;
		}

		// Creates new random chromossome
		genes.clear();

		for (unsigned k = 0; k < historicalData->openBugList.size(); k++) 
			genes.push_back (rand_uniform());

		// Calculate cost and chromossome size
		delete neighbour;
		neighbour = new Chromosome (historicalData, genes);
		evaluations++;

		if (neighbour->getCost() < costReference) 
		{
			delete chromosome;
			chromosome = neighbour;
			costReference = neighbour->getCost();
			neighbour = neighbour->clone();
		}
	}

	return chromosome;
}
Example #25
0
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor)
{
    int i;
    convolutional_layer l = {0};
    l.type = CONVOLUTIONAL;

    l.h = h;
    l.w = w;
    l.c = c;
    l.n = n;
    l.binary = binary;
    l.xnor = xnor;
    l.batch = batch;
    l.stride = stride;
    l.size = size;
    l.pad = pad;
    l.batch_normalize = batch_normalize;

    l.filters = calloc(c*n*size*size, sizeof(float));
    l.filter_updates = calloc(c*n*size*size, sizeof(float));

    l.biases = calloc(n, sizeof(float));
    l.bias_updates = calloc(n, sizeof(float));

    // float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c));
    for(i = 0; i < c*n*size*size; ++i) l.filters[i] = scale*rand_uniform(-1, 1);
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    l.out_h = out_h;
    l.out_w = out_w;
    l.out_c = n;
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = l.w * l.h * l.c;

    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));

    if(binary){
        l.binary_filters = calloc(c*n*size*size, sizeof(float));
        l.cfilters = calloc(c*n*size*size, sizeof(char));
        l.scales = calloc(n, sizeof(float));
    }
    if(xnor){
        l.binary_filters = calloc(c*n*size*size, sizeof(float));
        l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
    }

    if(batch_normalize){
        l.scales = calloc(n, sizeof(float));
        l.scale_updates = calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            l.scales[i] = 1;
        }

        l.mean = calloc(n, sizeof(float));
        l.variance = calloc(n, sizeof(float));

        l.rolling_mean = calloc(n, sizeof(float));
        l.rolling_variance = calloc(n, sizeof(float));
    }

#ifdef GPU
    l.filters_gpu = cuda_make_array(l.filters, c*n*size*size);
    l.filter_updates_gpu = cuda_make_array(l.filter_updates, c*n*size*size);

    l.biases_gpu = cuda_make_array(l.biases, n);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);

    l.scales_gpu = cuda_make_array(l.scales, n);
    l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);

    l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
    l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);

    if(binary){
        l.binary_filters_gpu = cuda_make_array(l.filters, c*n*size*size);
    }
    if(xnor){
        l.binary_filters_gpu = cuda_make_array(l.filters, c*n*size*size);
        l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);
    }

    if(batch_normalize){
        l.mean_gpu = cuda_make_array(l.mean, n);
        l.variance_gpu = cuda_make_array(l.variance, n);

        l.rolling_mean_gpu = cuda_make_array(l.mean, n);
        l.rolling_variance_gpu = cuda_make_array(l.variance, n);

        l.mean_delta_gpu = cuda_make_array(l.mean, n);
        l.variance_delta_gpu = cuda_make_array(l.variance, n);

        l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
        l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
    }
#ifdef CUDNN
    cudnnCreateTensorDescriptor(&l.srcTensorDesc);
    cudnnCreateTensorDescriptor(&l.dstTensorDesc);
    cudnnCreateFilterDescriptor(&l.filterDesc);
    cudnnCreateTensorDescriptor(&l.dsrcTensorDesc);
    cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
    cudnnCreateFilterDescriptor(&l.dfilterDesc);
    cudnnCreateConvolutionDescriptor(&l.convDesc);
    cudnnSetTensor4dDescriptor(l.dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.c, l.h, l.w); 
    cudnnSetTensor4dDescriptor(l.ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); 
    cudnnSetFilter4dDescriptor(l.dfilterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l.n, l.c, l.size, l.size); 

    cudnnSetTensor4dDescriptor(l.srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.c, l.h, l.w); 
    cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); 
    cudnnSetFilter4dDescriptor(l.filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l.n, l.c, l.size, l.size); 
    int padding = l.pad ? l.size/2 : 0;
    cudnnSetConvolution2dDescriptor(l.convDesc, padding, padding, l.stride, l.stride, 1, 1, CUDNN_CROSS_CORRELATION);
    cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
            l.srcTensorDesc,
            l.filterDesc,
            l.convDesc,
            l.dstTensorDesc,
            CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
            0,
            &l.fw_algo);
    cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
            l.filterDesc,
            l.ddstTensorDesc,
            l.convDesc,
            l.dsrcTensorDesc,
            CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
            0,
            &l.bd_algo);
    cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
            l.srcTensorDesc,
            l.ddstTensorDesc,
            l.convDesc,
            l.dfilterDesc,
            CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
            0,
            &l.bf_algo);
#endif
#endif
    l.workspace_size = get_workspace_size(l);
    l.activation = activation;

    fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);

    return l;
}
Example #26
0
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
{
    int i;
    convolutional_layer l = {0};
    l.type = CONVOLUTIONAL;

    l.h = h;
    l.w = w;
    l.c = c;
    l.n = n;
    l.binary = binary;
    l.xnor = xnor;
    l.batch = batch;
    l.stride = stride;
    l.size = size;
    l.pad = padding;
    l.batch_normalize = batch_normalize;

    l.weights = calloc(c*n*size*size, sizeof(float));
    l.weight_updates = calloc(c*n*size*size, sizeof(float));

    l.biases = calloc(n, sizeof(float));
    l.bias_updates = calloc(n, sizeof(float));

    // float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c));
    for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    l.out_h = out_h;
    l.out_w = out_w;
    l.out_c = n;
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = l.w * l.h * l.c;

    l.output = calloc(l.batch*l.outputs, sizeof(float));
    l.delta  = calloc(l.batch*l.outputs, sizeof(float));

    l.forward = forward_convolutional_layer;
    l.backward = backward_convolutional_layer;
    l.update = update_convolutional_layer;
    if(binary){
        l.binary_weights = calloc(c*n*size*size, sizeof(float));
        l.cweights = calloc(c*n*size*size, sizeof(char));
        l.scales = calloc(n, sizeof(float));
    }
    if(xnor){
        l.binary_weights = calloc(c*n*size*size, sizeof(float));
        l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
    }

    if(batch_normalize){
        l.scales = calloc(n, sizeof(float));
        l.scale_updates = calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            l.scales[i] = 1;
        }

        l.mean = calloc(n, sizeof(float));
        l.variance = calloc(n, sizeof(float));

        l.mean_delta = calloc(n, sizeof(float));
        l.variance_delta = calloc(n, sizeof(float));

        l.rolling_mean = calloc(n, sizeof(float));
        l.rolling_variance = calloc(n, sizeof(float));
        l.x = calloc(l.batch*l.outputs, sizeof(float));
        l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
    }
    if(adam){
        l.adam = 1;
        l.m = calloc(c*n*size*size, sizeof(float));
        l.v = calloc(c*n*size*size, sizeof(float));
    }

#ifdef GPU
    l.forward_gpu = forward_convolutional_layer_gpu;
    l.backward_gpu = backward_convolutional_layer_gpu;
    l.update_gpu = update_convolutional_layer_gpu;

    if(gpu_index >= 0){
        if (adam) {
            l.m_gpu = cuda_make_array(l.m, c*n*size*size);
            l.v_gpu = cuda_make_array(l.v, c*n*size*size);
        }

        l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
        l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);

        l.biases_gpu = cuda_make_array(l.biases, n);
        l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);

        l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
        l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);

        if(binary){
            l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
        }
        if(xnor){
            l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
            l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);
        }

        if(batch_normalize){
            l.mean_gpu = cuda_make_array(l.mean, n);
            l.variance_gpu = cuda_make_array(l.variance, n);

            l.rolling_mean_gpu = cuda_make_array(l.mean, n);
            l.rolling_variance_gpu = cuda_make_array(l.variance, n);

            l.mean_delta_gpu = cuda_make_array(l.mean, n);
            l.variance_delta_gpu = cuda_make_array(l.variance, n);

            l.scales_gpu = cuda_make_array(l.scales, n);
            l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);

            l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
            l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
        }
#ifdef CUDNN
        cudnnCreateTensorDescriptor(&l.srcTensorDesc);
        cudnnCreateTensorDescriptor(&l.dstTensorDesc);
        cudnnCreateFilterDescriptor(&l.weightDesc);
        cudnnCreateTensorDescriptor(&l.dsrcTensorDesc);
        cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
        cudnnCreateFilterDescriptor(&l.dweightDesc);
        cudnnCreateConvolutionDescriptor(&l.convDesc);
        cudnn_convolutional_setup(&l);
#endif
    }
#endif
    l.workspace_size = get_workspace_size(l);
    l.activation = activation;

    fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);

    return l;
}