int rand_pickf ( float *p, int n ) { double t, r; int i; t = 0; for (i = 0; i<n; i++) { if (p[i]<=0) abort(); t += p[i]; } if (t<=0) abort(); r = t * rand_uniform(); for (i = 0; i<n; i++) { r -= p[i]; if (r<0) return i; } /* Return value with non-zero probability if we get here due to roundoff. */ for (i = 0; i<n; i++) { if (p[i]>0) return i; } abort(); }
float get_current_rate(network net) { size_t batch_num = get_current_batch(net); int i; float rate; if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power); switch (net.policy) { case CONSTANT: return net.learning_rate; case STEP: return net.learning_rate * pow(net.scale, batch_num/net.step); case STEPS: rate = net.learning_rate; for(i = 0; i < net.num_steps; ++i){ if(net.steps[i] > batch_num) return rate; rate *= net.scales[i]; //if(net.steps[i] > batch_num - 1 && net.scales[i] > 1) reset_momentum(net); } return rate; case EXP: return net.learning_rate * pow(net.gamma, batch_num); case POLY: return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power); case RANDOM: return net.learning_rate * pow(rand_uniform(0,1), net.power); case SIG: return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step)))); default: fprintf(stderr, "Policy is weird!\n"); return net.learning_rate; } }
void restart (float ** simplex, float * response, float * step_size) { const float STEP_FACTOR = 0.9; int i, j; int worst, next, best; float minval, maxval; /* find the current best vertex */ eval_vertices (response, &worst, &next, &best); /* set the first vertex to the current best */ for (i = 0; i < DIMENSION; i++) simplex[0][i] = simplex[best][i]; /* decrease step size */ for (i = 0; i < DIMENSION; i++) step_size[i] *= STEP_FACTOR; /* set up remaining vertices of simplex using new step size */ for (i = 1; i < DIMENSION+1; i++) for (j = 0; j < DIMENSION; j++) { minval = simplex[0][j] - step_size[j]; maxval = simplex[0][j] + step_size[j]; simplex[i][j] = rand_uniform (minval, maxval); } /* initialize response for each vector */ for (i = 0; i < DIMENSION+1; i++) response[i] = calc_error (simplex[i]); }
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) { int i; local_layer l = { 0 }; l.type = LOCAL; l.h = h; l.w = w; l.c = c; l.n = n; l.batch = batch; l.stride = stride; l.size = size; l.pad = pad; int out_h = local_out_height(l); int out_w = local_out_width(l); int locations = out_h * out_w; l.out_h = out_h; l.out_w = out_w; l.out_c = n; l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = l.w * l.h * l.c; l.weights = calloc(c * n * size * size * locations, sizeof(float)); l.weight_updates = calloc(c * n * size * size * locations, sizeof(float)); l.biases = calloc(l.outputs, sizeof(float)); l.bias_updates = calloc(l.outputs, sizeof(float)); // float scale = 1./sqrt(size*size*c); float scale = sqrt(2. / (size * size * c)); for (i = 0; i < c * n * size * size; ++i) l.weights[i] = scale * rand_uniform(-1, 1); l.col_image = calloc(out_h * out_w * size * size * c, sizeof(float)); l.output = calloc(l.batch * out_h * out_w * n, sizeof(float)); l.delta = calloc(l.batch * out_h * out_w * n, sizeof(float)); #ifdef GPU l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); l.biases_gpu = cuda_make_array(l.biases, l.outputs); l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c); l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); #endif l.activation = activation; fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h, w, c, n, out_h, out_w, n); return l; }
double rand_gaussian (void) { double a, b; a = rand_uniform(); b = rand_uniopen(); return cos(2.0*M_PI*a) * sqrt(-2.0*log(b)); }
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) { int i; convolutional_layer l = {0}; l.type = CONVOLUTIONAL; l.h = h; l.w = w; l.c = c; l.n = n; l.batch = batch; l.stride = stride; l.size = size; l.pad = pad; l.filters = calloc(c*n*size*size, sizeof(float)); l.filter_updates = calloc(c*n*size*size, sizeof(float)); l.biases = calloc(n, sizeof(float)); l.bias_updates = calloc(n, sizeof(float)); //float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c)); printf("%f\n", scale); for(i = 0; i < c*n*size*size; ++i) l.filters[i] = 2*scale*rand_uniform() - scale; for(i = 0; i < n; ++i){ l.biases[i] = scale; } int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); l.out_h = out_h; l.out_w = out_w; l.out_c = n; l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = l.w * l.h * l.c; l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float)); l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); #ifdef GPU l.filters_gpu = cuda_make_array(l.filters, c*n*size*size); l.filter_updates_gpu = cuda_make_array(l.filter_updates, c*n*size*size); l.biases_gpu = cuda_make_array(l.biases, n); l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c); l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); #endif l.activation = activation; fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); return l; }
void forward_dropout_layer(dropout_layer l, network_state state) { int i; if (!state.train) return; for(i = 0; i < l.batch * l.inputs; ++i){ float r = rand_uniform(); l.rand[i] = r; if(r < l.probability) state.input[i] = 0; else state.input[i] *= l.scale; } }
void simplex_initialize (float * parameters, float ** simplex, float * response, float * step_size) { int i, j; int worst, next, best; float resp; float minval, maxval; for (j = 0; j < DIMENSION; j++) { simplex[0][j] = parameters[j]; step_size[j] = 0.5 * parameters[j]; } for (i = 1; i < DIMENSION+1; i++) for (j = 0; j < DIMENSION; j++) { minval = simplex[0][j] - step_size[j]; maxval = simplex[0][j] + step_size[j]; simplex[i][j] = rand_uniform (minval, maxval); } for (i = 0; i < DIMENSION+1; i++) response[i] = calc_error (simplex[i]); for (i = 1; i < 500; i++) { for (j = 0; j < DIMENSION; j++) { minval = simplex[0][j] - step_size[j]; maxval = simplex[0][j] + step_size[j]; parameters[j] = rand_uniform (minval, maxval); } resp = calc_error (parameters); eval_vertices (response, &worst, &next, &best); if (resp < response[worst]) replace (simplex, response, worst, parameters, resp); } }
int sample_array(float *a, int n) { float sum = sum_array(a, n); scale_array(a, n, 1./sum); float r = rand_uniform(0, 1); int i; for(i = 0; i < n; ++i){ r = r - a[i]; if (r <= 0) return i; } return n-1; }
static int init_gr() { int jj; int group; hav=muste_fopen(tempfile,"r+b"); for (jj=0L; jj<n; ++jj) { group=ng*rand_uniform()+1; hav_write1(jj,&group); } muste_fclose(hav); return(1); }
double prob(int n_planets, input_orbit* io, double sigma_i) { double sum = 0; orbit o[MAX_PLANETS]; planet_ellipse pe[MAX_PLANETS]; for (int i = 0; i < NTRIALS; i++) { for (int j = 0; j < n_planets; j++) { io[j].i = rand_Rayleigh (sigma_i) / RAD_TO_DEG; io[j].Omega = rand_uniform (2 * PI); o[j] = input_orbit_to_orbit (io[j]); o[j].use = 1; pe[j] = convert (o[j]); } // as e = 0 for all planets, values are EXACT sum += prob_of_transits_approx (n_planets, pe); } return sum / NTRIALS; }
connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation) { int i; connected_layer l = {0}; l.type = CONNECTED; l.inputs = inputs; l.outputs = outputs; l.batch=batch; l.output = calloc(batch*outputs, sizeof(float*)); l.delta = calloc(batch*outputs, sizeof(float*)); l.weight_updates = calloc(inputs*outputs, sizeof(float)); l.bias_updates = calloc(outputs, sizeof(float)); l.weights = calloc(outputs*inputs, sizeof(float)); l.biases = calloc(outputs, sizeof(float)); //float scale = 1./sqrt(inputs); float scale = sqrt(2./inputs); for(i = 0; i < outputs*inputs; ++i){ l.weights[i] = scale*rand_uniform(-1, 1); } for(i = 0; i < outputs; ++i){ l.biases[i] = scale; } #ifdef GPU l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); l.biases_gpu = cuda_make_array(l.biases, outputs); l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); l.output_gpu = cuda_make_array(l.output, outputs*batch); l.delta_gpu = cuda_make_array(l.delta, outputs*batch); #endif l.activation = activation; fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs); return l; }
double prob(double sigma_i) { double sum = 0; for (int i = 0; i < NTRIALS; i++) { // for (int k = 0; k < n_planets; k++) // { for (int j = 0; j < n_planets; j++) { IO90[j].i = rand_Rayleigh (sigma_i) / RAD_TO_DEG; IO90[j].Omega = rand_uniform (2 * PI); O90[j] = input_orbit_to_orbit (IO90[j]); O90[j].use = use[j]; // (int) (k != j); planets90[j] = convert (O90[j]); } // as e = 0 for all planets, values are EXACT sum += prob_of_transits_approx (n_planets, planets90); // } } return sum / NTRIALS; }
void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed) { srand(rseed); char *base = basecfg(cfgfile); fprintf(stderr, "%s\n", base); network net = parse_network_cfg(cfgfile); if(weightfile){ load_weights(&net, weightfile); } int inputs = get_network_input_size(net); int i, j; for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp; unsigned char c; int len = strlen(seed); float *input = calloc(inputs, sizeof(float)); for(i = 0; i < len-1; ++i){ c = seed[i]; input[(int)c] = 1; network_predict(net, input); input[(int)c] = 0; printf("%c", c); } c = seed[len-1]; for(i = 0; i < num; ++i){ printf("%c", c); float r = rand_uniform(0,1); float sum = 0; input[(int)c] = 1; float *out = network_predict(net, input); input[(int)c] = 0; for(j = 0; j < inputs; ++j){ sum += out[j]; if(sum > r) break; } c = j; } printf("\n"); }
// Returns a Gaussian-distributed random number using the Box-Muller method: // http://en.wikipedia.org/wiki/Box_muller#Basic_form (note: second value discarded) double rand_gaussian() { double u1 = rand_uniform(), u2 = rand_uniform(); return sqrt(-2*log(u1))*cos(2*M_PI*u2); }
void test_recursive_splitting_degenerate_perturbed() { // recursively splits a polyhedron (starting from a tet) with a cut plane // that is always degenerate with the tet in some way, // checking to see that the resulting volumes add up properly. // In this one, the cut plane is perturbed by a small amount. #define MIN_PERTURB_ORDER (-17) #define MAX_PERTURB_ORDER (-1) // explicit stack-based implementation r3d_int nstack, depth, t, chopt, m; r3d_poly polystack[STACK_SIZE]; r3d_int depthstack[STACK_SIZE]; // variables: the polyhedra and their moments r3d_rvec3 verts[4]; r3d_plane splane; r3d_poly opoly, poly1, poly2; r3d_real om[R3D_NUM_MOMENTS(POLY_ORDER)], m1[R3D_NUM_MOMENTS(POLY_ORDER)], m2[R3D_NUM_MOMENTS(POLY_ORDER)]; r3d_real perturb; // do many trials printf("Recursively splitting %d tetrahedra, maximum splits per tet is %d.\n", NUM_TRIALS, MAX_DEPTH); for(t = 0; t < NUM_TRIALS; ++t) { // compute the order of magnitude by which to perturb the clip plane, // determined by the trial number perturb = pow(10, MIN_PERTURB_ORDER + t%(MAX_PERTURB_ORDER - MIN_PERTURB_ORDER)); //printf("omag = %d, pow = %.10e\n", MIN_PERTURB_ORDER + t%(MAX_PERTURB_ORDER - MIN_PERTURB_ORDER), perturb); // generate a random tet rand_tet_3d(verts, MIN_VOL); r3d_init_tet(&opoly, verts); // push the starting tet to the stack nstack = 0; polystack[nstack] = opoly; depthstack[nstack] = 0; ++nstack; // recursively split the poly while(nstack > 0) { // pop the stack --nstack; opoly = polystack[nstack]; depth = depthstack[nstack]; // generate a random plane from one of a few // possible degenerate configurations, ensuring that it // has a valid unit normal chopt = rand_int(6); do { splane = choptions_3d[chopt](&opoly); } while(splane.n.x == 0.0 && splane.n.y == 0.0 && splane.n.z == 0.0); // randomly perturb the plane splane.n.x *= 1.0 + perturb*(rand_uniform() - 0.5); splane.n.y *= 1.0 + perturb*(rand_uniform() - 0.5); splane.n.z *= 1.0 + perturb*(rand_uniform() - 0.5); splane.d *= 1.0 + perturb*(rand_uniform() - 0.5); // split the poly by making two copies of the original poly // and them clipping them against the same plane, with one // oriented oppositely poly1 = opoly; poly2 = opoly; r3d_clip(&poly1, &splane, 1); splane.n.x *= -1; splane.n.y *= -1; splane.n.z *= -1; splane.d *= -1; r3d_clip(&poly2, &splane, 1); // reduce the original and its two parts r3d_reduce(&opoly, om, POLY_ORDER); r3d_reduce(&poly1, m1, POLY_ORDER); r3d_reduce(&poly2, m2, POLY_ORDER); // make sure the sum of moments equals the original for(m = 0; m < R3D_NUM_MOMENTS(POLY_ORDER); ++m) { ASSERT_EQ(om[m], m1[m] + m2[m], TOL_FAIL); EXPECT_EQ(om[m], m1[m] + m2[m], TOL_WARN); } // make sure neither of the two resulting volumes is larger than the original // (within some tolerance) ASSERT_LT(m1[0], om[0]*(1.0 + TOL_FAIL)); EXPECT_LT(m1[0], om[0]*(1.0 + TOL_WARN)); ASSERT_LT(m2[0], om[0]*(1.0 + TOL_FAIL)); EXPECT_LT(m2[0], om[0]*(1.0 + TOL_WARN)); //printf("nstack = %d, depth = %d, opoly = %.10e, p1 = %.10e, p2 = %.10e, err = %.10e\n", //nstack, depth, om[0], m1[0], m2[0], fabs(1.0 - om[0]/(m1[0] + m2[0]))); // push the children to the stack if they have // an acceptably large volume if(depth < MAX_DEPTH) { if(m1[0] > MIN_VOL) { polystack[nstack] = poly1; depthstack[nstack] = depth + 1; ++nstack; } if(m2[0] > MIN_VOL) { polystack[nstack] = poly2; depthstack[nstack] = depth + 1; ++nstack; } } } } }
void run_nightmare(int argc, char **argv) { srand(0); if(argc < 4){ fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); return; } char *cfg = argv[2]; char *weights = argv[3]; char *input = argv[4]; int max_layer = atoi(argv[5]); int range = find_int_arg(argc, argv, "-range", 1); int norm = find_int_arg(argc, argv, "-norm", 1); int rounds = find_int_arg(argc, argv, "-rounds", 1); int iters = find_int_arg(argc, argv, "-iters", 10); int octaves = find_int_arg(argc, argv, "-octaves", 4); float zoom = find_float_arg(argc, argv, "-zoom", 1.); float rate = find_float_arg(argc, argv, "-rate", .04); float thresh = find_float_arg(argc, argv, "-thresh", 1.); float rotate = find_float_arg(argc, argv, "-rotate", 0); float momentum = find_float_arg(argc, argv, "-momentum", .9); float lambda = find_float_arg(argc, argv, "-lambda", .01); char *prefix = find_char_arg(argc, argv, "-prefix", 0); int reconstruct = find_arg(argc, argv, "-reconstruct"); int smooth_size = find_int_arg(argc, argv, "-smooth", 1); network net = parse_network_cfg(cfg); load_weights(&net, weights); char *cfgbase = basecfg(cfg); char *imbase = basecfg(input); set_batch_network(&net, 1); image im = load_image_color(input, 0, 0); if(0){ float scale = 1; if(im.w > 512 || im.h > 512){ if(im.w > im.h) scale = 512.0/im.w; else scale = 512.0/im.h; } image resized = resize_image(im, scale*im.w, scale*im.h); free_image(im); im = resized; } float *features = 0; image update; if (reconstruct){ resize_network(&net, im.w, im.h); int zz = 0; network_predict(net, im.data); image out_im = get_network_image(net); image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); //flip_image(crop); image f_im = resize_image(crop, out_im.w, out_im.h); free_image(crop); printf("%d features\n", out_im.w*out_im.h*out_im.c); im = resize_image(im, im.w, im.h); f_im = resize_image(f_im, f_im.w, f_im.h); features = f_im.data; int i; for(i = 0; i < 14*14*512; ++i){ features[i] += rand_uniform(-.19, .19); } free_image(im); im = make_random_image(im.w, im.h, im.c); update = make_image(im.w, im.h, im.c); } int e; int n; for(e = 0; e < rounds; ++e){ fprintf(stderr, "Iteration: "); fflush(stderr); for(n = 0; n < iters; ++n){ fprintf(stderr, "%d, ", n); fflush(stderr); if(reconstruct){ reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); //if ((n+1)%30 == 0) rate *= .5; show_image(im, "reconstruction"); #ifdef OPENCV cvWaitKey(10); #endif }else{ int layer = max_layer + rand()%range - range/2; int octave = rand()%octaves; optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); } } fprintf(stderr, "done\n"); if(0){ image g = grayscale_image(im); free_image(im); im = g; } char buff[256]; if (prefix){ sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); }else{ sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); } printf("%d %s\n", e, buff); save_image(im, buff); //show_image(im, buff); //cvWaitKey(0); if(rotate){ image rot = rotate_image(im, rotate); free_image(im); im = rot; } image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); image resized = resize_image(crop, im.w, im.h); free_image(im); free_image(crop); im = resized; } }
float rand_scale(float s) { float scale = rand_uniform(1, s); if(rand()%2) return scale; return 1./scale; }
connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize) { int i; connected_layer l;// = {0}; memset(&l, 0, sizeof(connected_layer)); l.type = CONNECTED; l.inputs = inputs; l.outputs = outputs; l.batch=batch; l.batch_normalize = batch_normalize; l.h = 1; l.w = 1; l.c = inputs; l.out_h = 1; l.out_w = 1; l.out_c = outputs; l.output = (float*)calloc(batch*outputs, sizeof(float)); l.delta = (float*)calloc(batch*outputs, sizeof(float)); l.weight_updates = (float*)calloc(inputs*outputs, sizeof(float)); l.bias_updates = (float*)calloc(outputs, sizeof(float)); l.weights = (float*)calloc(outputs*inputs, sizeof(float)); l.biases = (float*)calloc(outputs, sizeof(float)); //float scale = 1./sqrt(inputs); float scale = sqrt(2./inputs); for(i = 0; i < outputs*inputs; ++i){ l.weights[i] = scale*rand_uniform(-1, 1); } for(i = 0; i < outputs; ++i){ l.biases[i] = 0; } if(batch_normalize){ l.scales = (float*)calloc(outputs, sizeof(float)); l.scale_updates = (float*)calloc(outputs, sizeof(float)); for(i = 0; i < outputs; ++i){ l.scales[i] = 1; } l.mean = (float*)calloc(outputs, sizeof(float)); l.mean_delta = (float*)calloc(outputs, sizeof(float)); l.variance =(float*) calloc(outputs, sizeof(float)); l.variance_delta = (float*)calloc(outputs, sizeof(float)); l.rolling_mean = (float*)calloc(outputs, sizeof(float)); l.rolling_variance = (float*)calloc(outputs, sizeof(float)); l.x = (float*)calloc(batch*outputs, sizeof(float)); l.x_norm = (float*)calloc(batch*outputs, sizeof(float)); } #ifdef GPU l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); l.biases_gpu = cuda_make_array(l.biases, outputs); l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); l.output_gpu = cuda_make_array(l.output, outputs*batch); l.delta_gpu = cuda_make_array(l.delta, outputs*batch); if(batch_normalize){ l.scales_gpu = cuda_make_array(l.scales, outputs); l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); l.mean_gpu = cuda_make_array(l.mean, outputs); l.variance_gpu = cuda_make_array(l.variance, outputs); l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); l.mean_delta_gpu = cuda_make_array(l.mean, outputs); l.variance_delta_gpu = cuda_make_array(l.variance, outputs); l.x_gpu = cuda_make_array(l.output, l.batch*outputs); l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs); } #endif l.activation = activation; fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs); return l; }
int main( int argc, char* argv[] ) { // Dimension of vectors // TODO: When I use 1e2, 1e3, 1e4 I get segfaults. Why? int n = (int) 1e7; int k = 10; int nk = n * k; // Arbitrary element to evaluate float x = 3.14159; // Host input vectors float *h_coef; float *h_controlpts; // Host output vector float *h_out; // Size, in bytes, of each vector size_t bytes = nk*sizeof(float); size_t outbytes = n*sizeof(float); // Allocate memory for each vector on host h_coef = (float*)malloc(bytes); h_controlpts = (float*)malloc(bytes); h_out = (float*)malloc(outbytes); // Initialize vectors on host int i; for( i = 0; i < nk; i++ ) { h_coef[i] = rand_uniform(); h_controlpts[i] = rand_uniform(); } // Adding this to test for correctness on the first row for( i = 0; i < k; i++ ) { h_coef[i] = 1.0; h_controlpts[i] = (float) i; } //------------------------------------------------------------ // OpenCL starts here /* Load kernel source file */ // From // https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/calling-the-kernel/ FILE *fp; const char fileName[] = "./tps_kernel.cl"; size_t source_size; char *kernelSource; fp = fopen(fileName, "r"); if (!fp) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } kernelSource = (char *)malloc(MAX_SOURCE_SIZE); source_size = fread(kernelSource, 1, MAX_SOURCE_SIZE, fp); fclose(fp); // Device input buffers cl_mem d_coef; cl_mem d_controlpts; // Device output buffer cl_mem d_out; cl_platform_id cpPlatform; // OpenCL platform cl_device_id device_id; // device ID cl_context context; // context cl_command_queue queue; // command queue cl_program program; // program cl_kernel kernel; // kernel size_t globalSize, localSize; cl_int err; // Number of work items in each local work group // TODO: What does this do? This may be a bug, go look! // Thi localSize = 100; // Number of total work items - localSize must be devisor globalSize = ceil(n/(float)localSize)*localSize; // Bind to platform err = clGetPlatformIDs(1, &cpPlatform, NULL); // Get ID for the device err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL); // Create a context context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); // Create a command queue queue = clCreateCommandQueue(context, device_id, 0, &err); // Create the compute program from the source buffer program = clCreateProgramWithSource(context, 1, (const char **) & kernelSource, NULL, &err); // Build the program executable clBuildProgram(program, 0, NULL, NULL, NULL, NULL); // Create the compute kernel in the program we wish to run kernel = clCreateKernel(program, "thin_plate_spline", &err); // Create the input and output arrays in device memory for our calculation d_coef = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL); d_controlpts = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL); d_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, outbytes, NULL, NULL); clock_t t0 = clock(); // Write our data set into the input array in device memory err = clEnqueueWriteBuffer(queue, d_coef, CL_TRUE, 0, bytes, h_coef, 0, NULL, NULL); err |= clEnqueueWriteBuffer(queue, d_controlpts, CL_TRUE, 0, bytes, h_controlpts, 0, NULL, NULL); // Set the arguments to our compute kernel err = clSetKernelArg(kernel, 0, sizeof(float), &x); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_coef); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_controlpts); err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &d_out); err |= clSetKernelArg(kernel, 4, sizeof(int), &k); err |= clSetKernelArg(kernel, 5, sizeof(int), &n); clock_t t1 = clock(); // Execute the kernel over the entire range of the data set err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL); // Wait for the command queue to get serviced before reading back results clFinish(queue); clock_t t2 = clock(); // Read the results from the device clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0, outbytes, h_out, 0, NULL, NULL ); clock_t t3 = clock(); printf("Dimensions: n = %i, k = %i\n", n, k); printf("\n"); printf("Transfer input to device (GPU): %f sec\n", time_spent(t0, t1)); printf("Run compute kernel: %f sec\n", time_spent(t1, t2)); printf("Return output to host (CPU): %f sec\n", time_spent(t2, t3)); printf("Total: %f sec\n", time_spent(t0, t3)); printf("\n"); printf("Correct?\n"); printf("Expected: 143.6215\n"); printf("Actual: %f\n", h_out[0]); // release OpenCL resources clReleaseMemObject(d_coef); clReleaseMemObject(d_controlpts); clReleaseMemObject(d_out); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseContext(context); //release host memory free(h_coef); free(h_controlpts); //*** glibc detected *** ./tpsCL: free(): invalid pointer: 0x00007fc58431f010 *** // Why? free(h_out); free(kernelSource); return 0; }
int rand_int ( int n ) { return (int) (n * rand_uniform()); }
static int lue_havainnot() { int j; int i,h; double y; int miss; int group; char *p; hav=muste_fopen(tempfile,"wb"); if (hav==NULL) { sprintf(sbuf,"\nCannot open file %s for temporary data!",tempfile); sur_print(sbuf); WAIT; return(-1); } sur_print("\nReading observations... "); for (i=0; i<m*m; ++i) T[i]=0.0; n=0L; for (j=d.l1; j<=d.l2; ++j) { if (unsuitable(&d,j)) continue; miss=0; for (i=0; i<m; ++i) { data_load(&d,j,d.v[i],&y); if (y==MISSING8) { miss=1; break; } xx[i]=y; } if (miss) continue; if (ivar>=0) { data_load(&d,j,ivar,&y); if (y==MISSING8) continue; group=y; if (group<=0 || group>ng) { ++ivar_init; group=0; } } else group=ng*rand_uniform()+1; p=(char *)&group; for (h=0; h<sizeof(int); ++h) putc((int)p[h],hav); group=0; p=(char *)&group; for (i=0; i<n_saved; ++i) for (h=0; h<sizeof(int); ++h) putc((int)p[h],hav); p=(char *)&j; for (h=0; h<sizeof(int); ++h) putc((int)p[h],hav); for (i=0; i<m; ++i) { p=(char *)&xx[i]; for (h=0; h<sizeof(double); ++h) putc((int)p[h],hav); } ++n; for (i=0; i<m; ++i) for (h=0; h<=i; ++h) T[i+m*h]+=xx[i]*xx[h]; if (sur_kbhit()) { i=sur_getch(); prind=1-prind; } if (prind) { sprintf(sbuf,"%d ",j); sur_print(sbuf); } } n_saved_len=n_saved*sizeof(int); hav_len=sizeof(int)+n_saved_len+sizeof(int)+m*sizeof(double); muste_fclose(hav); for (i=0; i<m; ++i) for (h=0; h<i; ++h) T[h+m*i]=T[i+m*h]; for (i=0; i<n_saved; ++i) { lambda2[i]=1e100; freq[i]=0; } return(1); }
int Load() { CountdownValue = 3; GameRunning = false; Cam.Create(); iteam::Font.push_back(gp2d::Font()); iteam::Font[0].Load("data/gui/ingame/counter.ttf", 64); iteam::Font.push_back(gp2d::Font()); iteam::Font[1].Load("data/gui/ingame/eras_bold.TTF", 12); iteam::Font.push_back(gp2d::Font()); iteam::Font[2].Load("data/gui/ingame/eras_bold.TTF", 11); Audio.push_back(gp2d::Sound()); Audio.push_back(gp2d::Sound()); Audio[0].LoadWAV("data/sound/characters/jump.wav"); Audio[1].LoadWAV("data/sound/interface/clock5.wav"); Song.push_back(gp2d::Music()); Song[0].Load("data/music/song1.ogg"); Song[0].SetLoop(-1); //Song[0].Play(); init_rand(); cout<<"random seed initialized"<<endl; //add players for(int i=0;i<2;i++) { AddPlayer(IT_PLAYER_SUSI,(int)rand_uniform(50,180),50,IT_PLAYER_FACE_RIGHT,1); //Change player names to make it easier to differentiate them. sprintf(Player[i].Name,"Susi %d",i); } strcpy(Player[0].Team,"Good"); strcpy(Player[1].Team,"Evil"); Tank_base.Load("data/vehicles/tank_base.png", 1, 1); Tank_canon.Load("data/vehicles/tank_canon.png", 1, 1); Level.push_back(gp2d::Sprite()); Level.push_back(gp2d::Sprite()); Level.push_back(gp2d::Sprite()); Level[0].Load("data/levels/grassymt/terrain.png"); Level[1].Load("data/levels/grassymt/layer1.png"); Level[2].Load("data/levels/grassymt/layer2.png"); Level[0].Move(VIEWPORT_WIDTH/2,VIEWPORT_HEIGHT-Level[0].height[0]/2); Level[1].width[0]=2048; Level[1].height[0]=1024; Level[2].ResizePropW(GameResW); InGameGUI.push_back(gp2d::Sprite()); InGameGUI.push_back(gp2d::Sprite()); InGameGUI[0].Load("data/gui/ingame/bottom2.png"); InGameGUI[0].ResizePropW(GameResW); InGameGUI[0].Move(VIEWPORT_WIDTH/2,VIEWPORT_HEIGHT-InGameGUI[0].height[0]/2); InGameGUI[0].alpha[0]=0.9f; InGameGUI[1].Load("data/gui/ingame/countdown_3.png"); InGameGUI[1].Load("data/gui/ingame/countdown_2.png"); InGameGUI[1].Load("data/gui/ingame/countdown_1.png"); InGameGUI[1].Load("data/gui/ingame/countdown_duel.png"); InGameGUI[1].iterateSheets = true; InGameGUI[1].setAnimationSpeed(1.0f); InGameGUI[1].animationTimer.Start(); AnglePointer.Load("data/gui/ingame/angle_pointer.png"); AnglePointer.Move(iteam::InGameGUI[0].x + 100, iteam::InGameGUI[0].y + 20); WeaponSelector.Load("data/weapons/weapon_selector.png"); WeaponSelector.Scale(38./32.); WeaponSelector.Move(THUMBS_WIDTH, 600 - THUMBS_HEIGHT); //Test_Init(); Init_Explosions(); Weapons_Init(); }
Chromosome *LocalSearch::run () { // Creates a random chromossome std::vector <double> genes; for (unsigned k = 0; k < historicalData->openBugList.size(); k++) genes.push_back (rand_uniform()); // Calculate cost and chromossome size Chromosome *chromosome = new Chromosome (historicalData, genes); double costReference = chromosome->getCost(); int evaluations = 1; // Hill Climbing local search int size = chromosome->genes.size(); Chromosome *neighbour = chromosome->clone(); while (evaluations <= maxEvaluations) { int i = 0, j; while (i < size && evaluations <= maxEvaluations) { for (j = i + 1; j < size && evaluations <= maxEvaluations; j++) { neighbour->swapGene(i, j); neighbour->recalculateSchedule(); evaluations++; if (neighbour->getCost() < costReference) { delete chromosome; chromosome = neighbour; costReference = neighbour->getCost(); neighbour = neighbour->clone(); break; } else neighbour->swapGene (j, i); } i = j; } // Creates new random chromossome genes.clear(); for (unsigned k = 0; k < historicalData->openBugList.size(); k++) genes.push_back (rand_uniform()); // Calculate cost and chromossome size delete neighbour; neighbour = new Chromosome (historicalData, genes); evaluations++; if (neighbour->getCost() < costReference) { delete chromosome; chromosome = neighbour; costReference = neighbour->getCost(); neighbour = neighbour->clone(); } } return chromosome; }
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor) { int i; convolutional_layer l = {0}; l.type = CONVOLUTIONAL; l.h = h; l.w = w; l.c = c; l.n = n; l.binary = binary; l.xnor = xnor; l.batch = batch; l.stride = stride; l.size = size; l.pad = pad; l.batch_normalize = batch_normalize; l.filters = calloc(c*n*size*size, sizeof(float)); l.filter_updates = calloc(c*n*size*size, sizeof(float)); l.biases = calloc(n, sizeof(float)); l.bias_updates = calloc(n, sizeof(float)); // float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c)); for(i = 0; i < c*n*size*size; ++i) l.filters[i] = scale*rand_uniform(-1, 1); int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); l.out_h = out_h; l.out_w = out_w; l.out_c = n; l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = l.w * l.h * l.c; l.output = calloc(l.batch*out_h * out_w * n, sizeof(float)); l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float)); if(binary){ l.binary_filters = calloc(c*n*size*size, sizeof(float)); l.cfilters = calloc(c*n*size*size, sizeof(char)); l.scales = calloc(n, sizeof(float)); } if(xnor){ l.binary_filters = calloc(c*n*size*size, sizeof(float)); l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); } if(batch_normalize){ l.scales = calloc(n, sizeof(float)); l.scale_updates = calloc(n, sizeof(float)); for(i = 0; i < n; ++i){ l.scales[i] = 1; } l.mean = calloc(n, sizeof(float)); l.variance = calloc(n, sizeof(float)); l.rolling_mean = calloc(n, sizeof(float)); l.rolling_variance = calloc(n, sizeof(float)); } #ifdef GPU l.filters_gpu = cuda_make_array(l.filters, c*n*size*size); l.filter_updates_gpu = cuda_make_array(l.filter_updates, c*n*size*size); l.biases_gpu = cuda_make_array(l.biases, n); l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); l.scales_gpu = cuda_make_array(l.scales, n); l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); if(binary){ l.binary_filters_gpu = cuda_make_array(l.filters, c*n*size*size); } if(xnor){ l.binary_filters_gpu = cuda_make_array(l.filters, c*n*size*size); l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); } if(batch_normalize){ l.mean_gpu = cuda_make_array(l.mean, n); l.variance_gpu = cuda_make_array(l.variance, n); l.rolling_mean_gpu = cuda_make_array(l.mean, n); l.rolling_variance_gpu = cuda_make_array(l.variance, n); l.mean_delta_gpu = cuda_make_array(l.mean, n); l.variance_delta_gpu = cuda_make_array(l.variance, n); l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); } #ifdef CUDNN cudnnCreateTensorDescriptor(&l.srcTensorDesc); cudnnCreateTensorDescriptor(&l.dstTensorDesc); cudnnCreateFilterDescriptor(&l.filterDesc); cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); cudnnCreateTensorDescriptor(&l.ddstTensorDesc); cudnnCreateFilterDescriptor(&l.dfilterDesc); cudnnCreateConvolutionDescriptor(&l.convDesc); cudnnSetTensor4dDescriptor(l.dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.c, l.h, l.w); cudnnSetTensor4dDescriptor(l.ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); cudnnSetFilter4dDescriptor(l.dfilterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l.n, l.c, l.size, l.size); cudnnSetTensor4dDescriptor(l.srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.c, l.h, l.w); cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); cudnnSetFilter4dDescriptor(l.filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l.n, l.c, l.size, l.size); int padding = l.pad ? l.size/2 : 0; cudnnSetConvolution2dDescriptor(l.convDesc, padding, padding, l.stride, l.stride, 1, 1, CUDNN_CROSS_CORRELATION); cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), l.srcTensorDesc, l.filterDesc, l.convDesc, l.dstTensorDesc, CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &l.fw_algo); cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), l.filterDesc, l.ddstTensorDesc, l.convDesc, l.dsrcTensorDesc, CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, 0, &l.bd_algo); cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), l.srcTensorDesc, l.ddstTensorDesc, l.convDesc, l.dfilterDesc, CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, 0, &l.bf_algo); #endif #endif l.workspace_size = get_workspace_size(l); l.activation = activation; fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); return l; }
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam) { int i; convolutional_layer l = {0}; l.type = CONVOLUTIONAL; l.h = h; l.w = w; l.c = c; l.n = n; l.binary = binary; l.xnor = xnor; l.batch = batch; l.stride = stride; l.size = size; l.pad = padding; l.batch_normalize = batch_normalize; l.weights = calloc(c*n*size*size, sizeof(float)); l.weight_updates = calloc(c*n*size*size, sizeof(float)); l.biases = calloc(n, sizeof(float)); l.bias_updates = calloc(n, sizeof(float)); // float scale = 1./sqrt(size*size*c); float scale = sqrt(2./(size*size*c)); for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1); int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); l.out_h = out_h; l.out_w = out_w; l.out_c = n; l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = l.w * l.h * l.c; l.output = calloc(l.batch*l.outputs, sizeof(float)); l.delta = calloc(l.batch*l.outputs, sizeof(float)); l.forward = forward_convolutional_layer; l.backward = backward_convolutional_layer; l.update = update_convolutional_layer; if(binary){ l.binary_weights = calloc(c*n*size*size, sizeof(float)); l.cweights = calloc(c*n*size*size, sizeof(char)); l.scales = calloc(n, sizeof(float)); } if(xnor){ l.binary_weights = calloc(c*n*size*size, sizeof(float)); l.binary_input = calloc(l.inputs*l.batch, sizeof(float)); } if(batch_normalize){ l.scales = calloc(n, sizeof(float)); l.scale_updates = calloc(n, sizeof(float)); for(i = 0; i < n; ++i){ l.scales[i] = 1; } l.mean = calloc(n, sizeof(float)); l.variance = calloc(n, sizeof(float)); l.mean_delta = calloc(n, sizeof(float)); l.variance_delta = calloc(n, sizeof(float)); l.rolling_mean = calloc(n, sizeof(float)); l.rolling_variance = calloc(n, sizeof(float)); l.x = calloc(l.batch*l.outputs, sizeof(float)); l.x_norm = calloc(l.batch*l.outputs, sizeof(float)); } if(adam){ l.adam = 1; l.m = calloc(c*n*size*size, sizeof(float)); l.v = calloc(c*n*size*size, sizeof(float)); } #ifdef GPU l.forward_gpu = forward_convolutional_layer_gpu; l.backward_gpu = backward_convolutional_layer_gpu; l.update_gpu = update_convolutional_layer_gpu; if(gpu_index >= 0){ if (adam) { l.m_gpu = cuda_make_array(l.m, c*n*size*size); l.v_gpu = cuda_make_array(l.v, c*n*size*size); } l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); l.biases_gpu = cuda_make_array(l.biases, n); l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); if(binary){ l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size); } if(xnor){ l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size); l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); } if(batch_normalize){ l.mean_gpu = cuda_make_array(l.mean, n); l.variance_gpu = cuda_make_array(l.variance, n); l.rolling_mean_gpu = cuda_make_array(l.mean, n); l.rolling_variance_gpu = cuda_make_array(l.variance, n); l.mean_delta_gpu = cuda_make_array(l.mean, n); l.variance_delta_gpu = cuda_make_array(l.variance, n); l.scales_gpu = cuda_make_array(l.scales, n); l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); } #ifdef CUDNN cudnnCreateTensorDescriptor(&l.srcTensorDesc); cudnnCreateTensorDescriptor(&l.dstTensorDesc); cudnnCreateFilterDescriptor(&l.weightDesc); cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); cudnnCreateTensorDescriptor(&l.ddstTensorDesc); cudnnCreateFilterDescriptor(&l.dweightDesc); cudnnCreateConvolutionDescriptor(&l.convDesc); cudnn_convolutional_setup(&l); #endif } #endif l.workspace_size = get_workspace_size(l); l.activation = activation; fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); return l; }