Exemple #1
0
void forward_batchnorm_layer(layer l, network_state state)
{
    if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
    if(l.type == CONNECTED){
        l.out_c = l.outputs;
        l.out_h = l.out_w = 1;
    }
    if(state.train){
        mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
        variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);

        scal_cpu(l.out_c, .99, l.rolling_mean, 1);
        axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
        scal_cpu(l.out_c, .99, l.rolling_variance, 1);
        axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);

        copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
        normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);   
        copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
    } else {
        normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
    }
    scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
    add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
}
Exemple #2
0
void oneoff(char *cfgfile, char *weightfile, char *outfile)
{
    gpu_index = -1;
    network *net = parse_network_cfg(cfgfile);
    int oldn = net->layers[net->n - 2].n;
    int c = net->layers[net->n - 2].c;
    scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1);
    scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1);
    net->layers[net->n - 2].n = 11921;
    net->layers[net->n - 2].biases += 5;
    net->layers[net->n - 2].weights += 5*c;
    if(weightfile){
        load_weights(net, weightfile);
    }
    net->layers[net->n - 2].biases -= 5;
    net->layers[net->n - 2].weights -= 5*c;
    net->layers[net->n - 2].n = oldn;
    printf("%d\n", oldn);
    layer l = net->layers[net->n - 2];
    copy_cpu(l.n/3, l.biases, 1, l.biases +   l.n/3, 1);
    copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1);
    copy_cpu(l.n/3*l.c, l.weights, 1, l.weights +   l.n/3*l.c, 1);
    copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1);
    *net->seen = 0;
    save_weights(net, outfile);
    free_network(net);
}
Exemple #3
0
void forward_connected_layer(connected_layer l, network_state state)
{
    int i;
    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
            variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);

            scal_cpu(l.outputs, .95, l.rolling_mean, 1);
            axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
            scal_cpu(l.outputs, .95, l.rolling_variance, 1);
            axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);

            copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);   
            copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
        }
        scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
    }
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1);
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
Exemple #4
0
void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay)
{
    axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.outputs, momentum, l.bias_updates, 1);

    axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
}
void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate, float momentum, float decay)
{
    int size = l.size*l.size*l.c*l.n;
    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    axpy_cpu(size, -decay*batch, l.filters, 1, l.filter_updates, 1);
    axpy_cpu(size, learning_rate/batch, l.filter_updates, 1, l.filters, 1);
    scal_cpu(size, momentum, l.filter_updates, 1);
}
void update_local_layer(local_layer l, int batch, float learning_rate,
		float momentum, float decay) {
	int locations = l.out_w * l.out_h;
	int size = l.size * l.size * l.c * l.n * locations;
	axpy_cpu(l.outputs, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
	scal_cpu(l.outputs, momentum, l.bias_updates, 1);

	axpy_cpu(size, -decay * batch, l.weights, 1, l.weight_updates, 1);
	axpy_cpu(size, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
	scal_cpu(size, momentum, l.weight_updates, 1);
}
void scale_weights(layer l, float s)
{
    if (l.type == CONVOLUTIONAL) {
        scal_cpu(l.n, s, l.biases, 1);
        scal_cpu(l.n*l.size*l.size*l.c, s, l.weights, 1);
        if (l.scales) {
            scal_cpu(l.n, s, l.scales, 1);
        }
    } else if(l.type == CONNECTED) {
        scal_cpu(l.outputs, s, l.biases, 1);
        scal_cpu(l.outputs*l.inputs, s, l.weights, 1);
    }
}
void update_deconvolutional_layer(layer l, int batch, float learning_rate, float momentum, float decay)
{
    int size = l.size*l.size*l.c*l.n;
    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    if(l.scales){
        axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
        scal_cpu(l.n, momentum, l.scale_updates, 1);
    }

    axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(size, momentum, l.weight_updates, 1);
}
Exemple #9
0
void predict_move(network net, float *board, float *move, int multi)
{
    float *output = network_predict(net, board);
    copy_cpu(19*19, output, 1, move, 1);
    int i;
    if(multi){
        image bim = float_to_image(19, 19, 1, board);
        for(i = 1; i < 8; ++i){
            rotate_image_cw(bim, i);
            if(i >= 4) flip_image(bim);

            float *output = network_predict(net, board);
            image oim = float_to_image(19, 19, 1, output);

            if(i >= 4) flip_image(oim);
            rotate_image_cw(oim, -i);

            axpy_cpu(19*19, 1, output, 1, move, 1);

            if(i >= 4) flip_image(bim);
            rotate_image_cw(bim, -i);
        }
        scal_cpu(19*19, 1./8., move, 1);
    }
    for(i = 0; i < 19*19; ++i){
        if(board[i]) move[i] = 0;
    }
}
Exemple #10
0
static void average(int argc, char *argv[])
{
    char *cfgfile = argv[2];
    char *outfile = argv[3];
    gpu_index = -1;
    network net = parse_network_cfg(cfgfile);
    network sum = parse_network_cfg(cfgfile);

    char *weightfile = argv[4];   
    load_weights(&sum, weightfile);

    int i, j;
    int n = argc - 5;
    for(i = 0; i < n; ++i){
        weightfile = argv[i+5];   
        load_weights(&net, weightfile);
        for(j = 0; j < net.n; ++j){
            layer_t l = net.layers[j];
            layer_t out = sum.layers[j];
            if(l.type == CONVOLUTIONAL){
                int num = l.n*l.c*l.size*l.size;
                fltadd(out.biases, l.biases, l.n);
                fltadd(out.filters, l.filters, num);
            }
            if(l.type == CONNECTED){
                fltadd(out.biases, l.biases, l.outputs);
                fltadd(out.weights, l.weights, l.outputs * l.inputs);
            }
        }
    }
    n = n+1;
    for(j = 0; j < net.n; ++j){
        layer_t l = sum.layers[j];
        if(l.type == CONVOLUTIONAL){
            int num = l.n*l.c*l.size*l.size;
            scal_cpu(l.n, 1./n, l.biases, 1);
            scal_cpu(num, 1./n, l.filters, 1);
        }
        if(l.type == CONNECTED){
            scal_cpu(l.outputs, 1./n, l.biases, 1);
            scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1);
        }
    }
    save_weights(sum, outfile);
}
Exemple #11
0
int main()
{
	int N, N2;
	printf(" \n Input matrix size N x N, N = ");
	scanf("%d", &N);
	printf(" N = %d \n \n", N);
	N2 = N*N;

	double *A, *B, *C_cpu, *C_gpu, *D_cpu, *D_gpu, t1, t2, cpu_time, gpu_time;
	double r_cpu, *r_gpu, nrmC_cpu, *nrmC_gpu;

	A = (double *) malloc(N2*sizeof(double));
	B = (double *) malloc(N2*sizeof(double));
	C_cpu = (double *) malloc(N2*sizeof(double));
	C_gpu = (double *) malloc(N2*sizeof(double));
	D_cpu = (double *) malloc(N2*sizeof(double));
	D_gpu = (double *) malloc(N2*sizeof(double));

	r_gpu = (double *) malloc(1*sizeof(double));
	nrmC_gpu = (double *) malloc(1*sizeof(double));

	initial(A, B, N);

	t1 = clock();

	#pragma acc data copyin(A[0:N2], B[0:N2]) copyout(C_cpu[0:N2])
	{
		cublas_gemm(A, B, C_cpu, N);
	}
	r_cpu = dot_cpu(C_cpu, B, N2);
	axpy_cpu(-1.0*r_cpu, B, C_cpu, N2);
	nrmC_cpu = norm_cpu(C_cpu, N2);
	copy_cpu(C_cpu, D_cpu, N2);
	scal_cpu(1.0/nrmC_cpu, D_cpu, N2);

	t2 = clock();
	cpu_time = 1.0*(t2 - t1)/CLOCKS_PER_SEC;

	t1 = clock();

	#pragma acc enter data copyin(A[0:N2], B[0:N2]) create(C_gpu[0:N2], r_gpu[0], nrmC_gpu[0], D_gpu[0:N2])
	{
		gpu_cublas1(A, B, C_gpu, D_gpu, r_gpu, nrmC_gpu, N, N2);
	}
	#pragma acc update host(D_gpu[0:N2])

	t2 = clock();
	gpu_time = 1.0*(t2 - t1)/CLOCKS_PER_SEC;
	printf(" gpu part success \n");


	printf(" \n error = %f \n", error(D_cpu, D_gpu, N2));
	printf(" gpu time = %f, cpu times = %f \n", gpu_time, cpu_time);

	return 0;
}
void update_deconvolutional_layer(layer l, update_args a) {
	real_t learning_rate = a.learning_rate * l.learning_rate_scale;
	real_t momentum = a.momentum;
	real_t decay = a.decay;
	int batch = a.batch;

	int size = l.size * l.size * l.c * l.n;
	axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
	scal_cpu(l.n, momentum, l.bias_updates, 1);

	if (l.scales) {
		axpy_cpu(l.n, learning_rate / batch, l.scale_updates, 1, l.scales, 1);
		scal_cpu(l.n, momentum, l.scale_updates, 1);
	}

	axpy_cpu(size, -decay * batch, l.weights, 1, l.weight_updates, 1);
	axpy_cpu(size, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
	scal_cpu(size, momentum, l.weight_updates, 1);
}
void update_convolutional_layer(convolutional_layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;

    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    if(l.scales){
        axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
        scal_cpu(l.n, momentum, l.scale_updates, 1);
    }

    axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(l.nweights, momentum, l.weight_updates, 1);
}
Exemple #14
0
void average(int argc, char *argv[])
{
    char *cfgfile = argv[2];
    char *outfile = argv[3];
    gpu_index = -1;
    network *net = parse_network_cfg(cfgfile);
    network *sum = parse_network_cfg(cfgfile);

    char *weightfile = argv[4];
    load_weights(sum, weightfile);

    int i, j;
    int n = argc - 5;
    for(i = 0; i < n; ++i){
        weightfile = argv[i+5];
        load_weights(net, weightfile);
        for(j = 0; j < net->n; ++j){
            layer l = net->layers[j];
            layer out = sum->layers[j];
            if(l.type == CONVOLUTIONAL){
                int num = l.n*l.c*l.size*l.size;
                axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1);
                axpy_cpu(num, 1, l.weights, 1, out.weights, 1);
                if(l.batch_normalize){
                    axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1);
                    axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1);
                    axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1);
                }
            }
            if(l.type == CONNECTED){
                axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1);
                axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1);
            }
        }
    }
    n = n+1;
    for(j = 0; j < net->n; ++j){
        layer l = sum->layers[j];
        if(l.type == CONVOLUTIONAL){
            int num = l.n*l.c*l.size*l.size;
            scal_cpu(l.n, 1./n, l.biases, 1);
            scal_cpu(num, 1./n, l.weights, 1);
                if(l.batch_normalize){
                    scal_cpu(l.n, 1./n, l.scales, 1);
                    scal_cpu(l.n, 1./n, l.rolling_mean, 1);
                    scal_cpu(l.n, 1./n, l.rolling_variance, 1);
                }
        }
        if(l.type == CONNECTED){
            scal_cpu(l.outputs, 1./n, l.biases, 1);
            scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1);
        }
    }
    save_weights(sum, outfile);
}
Exemple #15
0
void forward_network(network net, network_state state)
{
    state.workspace = net.workspace;
    int i;
    for(i = 0; i < net.n; ++i){
        state.index = i;
        layer l = net.layers[i];
        if(l.delta){
            scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
        }
        if(l.type == CONVOLUTIONAL){
            forward_convolutional_layer(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            forward_deconvolutional_layer(l, state);
        } else if(l.type == ACTIVE){
            forward_activation_layer(l, state);
        } else if(l.type == LOCAL){
            forward_local_layer(l, state);
        } else if(l.type == NORMALIZATION){
            forward_normalization_layer(l, state);
        } else if(l.type == BATCHNORM){
            forward_batchnorm_layer(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer(l, state);
        } else if(l.type == RNN){
            forward_rnn_layer(l, state);
        } else if(l.type == GRU){
            forward_gru_layer(l, state);
        } else if(l.type == CRNN){
            forward_crnn_layer(l, state);
        } else if(l.type == CROP){
            forward_crop_layer(l, state);
        } else if(l.type == COST){
            forward_cost_layer(l, state);
        } else if(l.type == SOFTMAX){
            forward_softmax_layer(l, state);
        } else if(l.type == MAXPOOL){
            forward_maxpool_layer(l, state);
        } else if(l.type == AVGPOOL){
            forward_avgpool_layer(l, state);
        } else if(l.type == DROPOUT){
            forward_dropout_layer(l, state);
        } else if(l.type == ROUTE){
            forward_route_layer(l, net);
        } else if(l.type == SHORTCUT){
            forward_shortcut_layer(l, state);
        }
        state.input = l.output;
    }
}
void forward_network(network net, network_state state)
{
    state.workspace = net.workspace;
    int i;
    for(i = 0; i < net.n; ++i){
        state.index = i;
        layer l = net.layers[i];
        if(l.delta){
            scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
        }
        l.forward(l, state);
        state.input = l.output;
    }
}
Exemple #17
0
void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters)
{
    int iter = 0;
    for (iter = 0; iter < iters; ++iter) {
        image delta = make_image(recon.w, recon.h, recon.c);

        NETWORK_STATE(state);
#ifdef GPU
        state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c);
        state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
        state.truth = cuda_make_array(features, get_network_output_size(net));

        forward_network_gpu(net, state);
        backward_network_gpu(net, state);

        cuda_pull_array(state.delta, delta.data, delta.w*delta.h*delta.c);

        cuda_free(state.input);
        cuda_free(state.delta);
        cuda_free(state.truth);
#else
        state.input = recon.data;
        state.delta = delta.data;
        state.truth = features;

        forward_network(net, state);
        backward_network(net, state);
#endif

        fltadd(update.data, delta.data, recon.w * recon.h * recon.c);
        smooth(recon, update, lambda, smooth_size);

        fltaddmul(recon.data, update.data, recon.w * recon.h * recon.c, rate);
        scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1);

        //float mag = mag_array(recon.data, recon.w*recon.h*recon.c);
        //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1);

        constrain_image(recon);
        free_image(delta);
    }
}
Exemple #18
0
void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size)
{
    scale_image(recon, 2);
    translate_image(recon, -1);

    image delta = make_image(recon.w, recon.h, recon.c);

    network_state state = {0};
#ifdef GPU
    state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c);
    state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
    state.truth = cuda_make_array(features, get_network_output_size(net));

    forward_network_gpu(net, state);
    backward_network_gpu(net, state);

    cuda_pull_array(state.delta, delta.data, delta.w*delta.h*delta.c);

    cuda_free(state.input);
    cuda_free(state.delta);
    cuda_free(state.truth);
#else
    state.input = recon.data;
    state.delta = delta.data;
    state.truth = features;

    forward_network(net, state);
    backward_network(net, state);
#endif

    axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1);
    smooth(recon, update, lambda, smooth_size);

    axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1);
    scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1);

    translate_image(recon, 1);
    scale_image(recon, .5);
    constrain_image(recon);
    free_image(delta);
}
Exemple #19
0
void forward_network(network net, network_state state)
{
    int i;
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.delta){
            scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
        }
        if(l.type == CONVOLUTIONAL){
            forward_convolutional_layer(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            forward_deconvolutional_layer(l, state);
        } else if(l.type == NORMALIZATION){
            forward_normalization_layer(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer(l, state);
        } else if(l.type == CROP){
            forward_crop_layer(l, state);
        } else if(l.type == COST){
            forward_cost_layer(l, state);
        } else if(l.type == SOFTMAX){
            forward_softmax_layer(l, state);
        } else if(l.type == MAXPOOL){
            forward_maxpool_layer(l, state);
        } else if(l.type == AVGPOOL){
            forward_avgpool_layer(l, state);
        } else if(l.type == DROPOUT){
            forward_dropout_layer(l, state);
        } else if(l.type == ROUTE){
            forward_route_layer(l, net);
        }
        state.input = l.output;
    }
}
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n,
		int size, int stride, int padding, ACTIVATION activation,
		int batch_normalize, int adam) {
	int i;
	layer l = { 0 };
	l.type = DECONVOLUTIONAL;

	l.h = h;
	l.w = w;
	l.c = c;
	l.n = n;
	l.batch = batch;
	l.stride = stride;
	l.size = size;

	l.nweights = c * n * size * size;
	l.nbiases = n;

	l.weights = calloc(c * n * size * size, sizeof(real_t));
	l.weight_updates = calloc(c * n * size * size, sizeof(real_t));

	l.biases = calloc(n, sizeof(real_t));
	l.bias_updates = calloc(n, sizeof(real_t));
	//real_t scale = n/(size*size*c);
	//printf("scale: %f\n", scale);
	real_t scale = .02;
	for (i = 0; i < c * n * size * size; ++i)
		l.weights[i] = scale * rand_normal();
	//bilinear_init(l);
	for (i = 0; i < n; ++i) {
		l.biases[i] = 0;
	}
	l.pad = padding;

	l.out_h = (l.h - 1) * l.stride + l.size - 2 * l.pad;
	l.out_w = (l.w - 1) * l.stride + l.size - 2 * l.pad;
	l.out_c = n;
	l.outputs = l.out_w * l.out_h * l.out_c;
	l.inputs = l.w * l.h * l.c;

	scal_cpu(l.nweights, (real_t) l.out_w * l.out_h / (l.w * l.h), l.weights,
			1);

	l.output = calloc(l.batch * l.outputs, sizeof(real_t));
	l.delta = calloc(l.batch * l.outputs, sizeof(real_t));

	l.forward = forward_deconvolutional_layer;
	l.backward = backward_deconvolutional_layer;
	l.update = update_deconvolutional_layer;

	l.batch_normalize = batch_normalize;

	if (batch_normalize) {
		l.scales = calloc(n, sizeof(real_t));
		l.scale_updates = calloc(n, sizeof(real_t));
		for (i = 0; i < n; ++i) {
			l.scales[i] = 1;
		}

		l.mean = calloc(n, sizeof(real_t));
		l.variance = calloc(n, sizeof(real_t));

		l.mean_delta = calloc(n, sizeof(real_t));
		l.variance_delta = calloc(n, sizeof(real_t));

		l.rolling_mean = calloc(n, sizeof(real_t));
		l.rolling_variance = calloc(n, sizeof(real_t));
		l.x = calloc(l.batch * l.outputs, sizeof(real_t));
		l.x_norm = calloc(l.batch * l.outputs, sizeof(real_t));
	}
	if (adam) {
		l.m = calloc(c * n * size * size, sizeof(real_t));
		l.v = calloc(c * n * size * size, sizeof(real_t));
		l.bias_m = calloc(n, sizeof(real_t));
		l.scale_m = calloc(n, sizeof(real_t));
		l.bias_v = calloc(n, sizeof(real_t));
		l.scale_v = calloc(n, sizeof(real_t));
	}

#ifdef GPU
	l.forward_gpu = forward_deconvolutional_layer_gpu;
	l.backward_gpu = backward_deconvolutional_layer_gpu;
	l.update_gpu = update_deconvolutional_layer_gpu;

	if(gpu_index >= 0) {

		if (adam) {
			l.m_gpu = cuda_make_array(l.m, c*n*size*size);
			l.v_gpu = cuda_make_array(l.v, c*n*size*size);
			l.bias_m_gpu = cuda_make_array(l.bias_m, n);
			l.bias_v_gpu = cuda_make_array(l.bias_v, n);
			l.scale_m_gpu = cuda_make_array(l.scale_m, n);
			l.scale_v_gpu = cuda_make_array(l.scale_v, n);
		}
		l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
		l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);

		l.biases_gpu = cuda_make_array(l.biases, n);
		l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);

		l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n);
		l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);

		if(batch_normalize) {
			l.mean_gpu = cuda_make_array(0, n);
			l.variance_gpu = cuda_make_array(0, n);

			l.rolling_mean_gpu = cuda_make_array(0, n);
			l.rolling_variance_gpu = cuda_make_array(0, n);

			l.mean_delta_gpu = cuda_make_array(0, n);
			l.variance_delta_gpu = cuda_make_array(0, n);

			l.scales_gpu = cuda_make_array(l.scales, n);
			l.scale_updates_gpu = cuda_make_array(0, n);

			l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
			l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
		}
	}
#ifdef CUDNN
	cudnnCreateTensorDescriptor(&l.dstTensorDesc);
	cudnnCreateTensorDescriptor(&l.normTensorDesc);
	cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
	cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
#endif
#endif

	l.activation = activation;
	l.workspace_size = get_workspace_size(l);

	fprintf(stderr,
			"deconv%5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n,
			size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);

	return l;
}
void forward_iseg_layer(const layer l, network net) {

	double time = what_time_is_it_now();
	int i, b, j, k;
	int ids = l.extra;
	memcpy(l.output, net.input, l.outputs * l.batch * sizeof(real_t));
	memset(l.delta, 0, l.outputs * l.batch * sizeof(real_t));

#ifndef GPU
	for (b = 0; b < l.batch; ++b) {
		int index = b * l.outputs;
		activate_array(l.output + index, l.classes * l.w * l.h, LOGISTIC);
	}
#endif

	for (b = 0; b < l.batch; ++b) {
		// a priori, each pixel has no class
		for (i = 0; i < l.classes; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + i * l.w * l.h + k;
				l.delta[index] = 0 - l.output[index];
			}
		}

		// a priori, embedding should be small magnitude
		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] = .1 * (0 - l.output[index]);
			}
		}

		memset(l.counts, 0, 90 * sizeof(int));
		for (i = 0; i < 90; ++i) {
			fill_cpu(ids, 0, l.sums[i], 1);

			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			// add up metric embeddings for each instance
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + c * l.w * l.h + k;
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					l.delta[index] = v - l.output[index];
					axpy_cpu(ids, 1,
							l.output + b * l.outputs + l.classes * l.w * l.h
									+ k, l.w * l.h, l.sums[i], 1);
					++l.counts[i];
				}
			}
		}

		real_t *mse = calloc(90, sizeof(real_t));
		for (i = 0; i < 90; ++i) {
			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					int z;
					real_t sum = 0;
					for (z = 0; z < ids; ++z) {
						int index = b * l.outputs + (l.classes + z) * l.w * l.h
								+ k;
						sum += pow(l.sums[i][z] / l.counts[i] - l.output[index],
								2);
					}
					mse[i] += sum;
				}
			}
			mse[i] /= l.counts[i];
		}

		// Calculate average embedding
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			scal_cpu(ids, 1.f / l.counts[i], l.sums[i], 1);
			if (b == 0 && net.gpu_index == 0) {
				printf("%4d, %6.3f, ", l.counts[i], mse[i]);
				for (j = 0; j < ids; ++j) {
					printf("%6.3f,", l.sums[i][j]);
				}
				printf("\n");
			}
		}
		free(mse);

		// Calculate embedding loss
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					for (j = 0; j < 90; ++j) {
						if (!l.counts[j])
							continue;
						int z;
						for (z = 0; z < ids; ++z) {
							int index = b * l.outputs
									+ (l.classes + z) * l.w * l.h + k;
							real_t diff = l.sums[j][z] - l.output[index];
							if (j == i)
								l.delta[index] += diff < 0 ? -.1 : .1;
							else
								l.delta[index] += -(diff < 0 ? -.1 : .1);
						}
					}
				}
			}
		}

		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] *= .01;
			}
		}
	}

	*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
	printf("took %lf sec\n", what_time_is_it_now() - time);
}
Exemple #22
0
void test_go(char *filename, char *weightfile, int multi)
{
    network net = parse_network_cfg(filename);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    srand(time(0));
    set_batch_network(&net, 1);
    float *board = calloc(19*19, sizeof(float));
    float *move = calloc(19*19, sizeof(float));
    int color = 1;
    while(1){
        float *output = network_predict(net, board);
        fltcpy(move, output, 19 * 19);
        int i;
        if(multi){
            image bim = float_to_image(19, 19, 1, board);
            for(i = 1; i < 8; ++i){
                rotate_image_cw(bim, i);
                if(i >= 4) flip_image(bim);

                float *output = network_predict(net, board);
                image oim = float_to_image(19, 19, 1, output);

                if(i >= 4) flip_image(oim);
                rotate_image_cw(oim, -i);

                fltadd(move, output, 19 * 19);

                if(i >= 4) flip_image(bim);
                rotate_image_cw(bim, -i);
            }
            scal_cpu(19*19, 1./8., move, 1);
        }
        for(i = 0; i < 19*19; ++i){
            if(board[i]) move[i] = 0;
        }

        int indexes[nind];
        int row, col;
        top_k(move, 19*19, nind, indexes);
        print_board(board, color, indexes);
        for(i = 0; i < nind; ++i){
            int index = indexes[i];
            row = index / 19;
            col = index % 19;
            printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100);
        }
        if(color == 1) printf("\u25EF Enter move: ");
        else printf("\u25C9 Enter move: ");

        char c;
        char *line = fgetl(stdin);
        int picked = 1;
        int dnum = sscanf(line, "%d", &picked);
        int cnum = sscanf(line, "%c", &c);
        if (strlen(line) == 0 || dnum) {
            --picked;
            if (picked < nind){
                int index = indexes[picked];
                row = index / 19;
                col = index % 19;
                board[row*19 + col] = 1;
            }
        } else if (cnum){
            if (c <= 'T' && c >= 'A'){
                int num = sscanf(line, "%c %d", &c, &row);
                row = (inverted)?19 - row : row-1;
                col = c - 'A';
                if (col > 7 && noi) col -= 1;
                if (num == 2) board[row*19 + col] = 1;
            } else if (c == 'p') {
                // Pass
            } else if(c=='b' || c == 'w'){
                char g;
                int num = sscanf(line, "%c %c %d", &g, &c, &row);
                row = (inverted)?19 - row : row-1;
                col = c - 'A';
                if (col > 7 && noi) col -= 1;
                if (num == 3) board[row*19 + col] = (g == 'b') ? color : -color;
            } else if(c == 'c'){
                char g;
                int num = sscanf(line, "%c %c %d", &g, &c, &row);
                row = (inverted)?19 - row : row-1;
                col = c - 'A';
                if (col > 7 && noi) col -= 1;
                if (num == 3) board[row*19 + col] = 0;
            }
        }
        free(line);
        update_board(board);
        flip_board(board);
        color = -color;
    }

}