示例#1
0
void forward_connected_layer(connected_layer l, network_state state)
{
    int i;
    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
            variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);

            scal_cpu(l.outputs, .95, l.rolling_mean, 1);
            axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
            scal_cpu(l.outputs, .95, l.rolling_variance, 1);
            axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);

            copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);   
            copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
        }
        scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
    }
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1);
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
示例#2
0
文件: cifar.c 项目: Darzu/darknet
void test_cifar_multi(char *filename, char *weightfile)
{
    network net = parse_network_cfg(filename);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    set_batch_network(&net, 1);
    srand(time(0));

    float avg_acc = 0;
    data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin");

    int i;
    for(i = 0; i < test.X.rows; ++i){
        image im = float_to_image(32, 32, 3, test.X.vals[i]);

        float pred[10] = {0};

        float *p = network_predict(net, im.data);
        axpy_cpu(10, 1, p, 1, pred, 1);
        flip_image(im);
        p = network_predict(net, im.data);
        axpy_cpu(10, 1, p, 1, pred, 1);

        int index = max_index(pred, 10);
        int class = max_index(test.y.vals[i], 10);
        if(index == class) avg_acc += 1;
        free_image(im);
        printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1));
    }
}
示例#3
0
void forward_batchnorm_layer(layer l, network_state state)
{
    if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
    if(l.type == CONNECTED){
        l.out_c = l.outputs;
        l.out_h = l.out_w = 1;
    }
    if(state.train){
        mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
        variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);

        scal_cpu(l.out_c, .99, l.rolling_mean, 1);
        axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
        scal_cpu(l.out_c, .99, l.rolling_variance, 1);
        axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);

        copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
        normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);   
        copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
    } else {
        normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
    }
    scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
    add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
}
示例#4
0
void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay)
{
    axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.outputs, momentum, l.bias_updates, 1);

    axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
}
void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate, float momentum, float decay)
{
    int size = l.size*l.size*l.c*l.n;
    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    axpy_cpu(size, -decay*batch, l.filters, 1, l.filter_updates, 1);
    axpy_cpu(size, learning_rate/batch, l.filter_updates, 1, l.filters, 1);
    scal_cpu(size, momentum, l.filter_updates, 1);
}
示例#6
0
文件: lsd.c 项目: kunle12/darknet
void slerp(float *start, float *end, float s, int n, float *out)
{
    float omega = acos(dot_cpu(n, start, 1, end, 1));
    float so = sin(omega);
    fill_cpu(n, 0, out, 1);
    axpy_cpu(n, sin((1-s)*omega)/so, start, 1, out, 1);
    axpy_cpu(n, sin(s*omega)/so, end, 1, out, 1);

    float mag = mag_array(out, n);
    scale_array(out, n, 1./mag);
}
void update_local_layer(local_layer l, int batch, float learning_rate,
		float momentum, float decay) {
	int locations = l.out_w * l.out_h;
	int size = l.size * l.size * l.c * l.n * locations;
	axpy_cpu(l.outputs, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
	scal_cpu(l.outputs, momentum, l.bias_updates, 1);

	axpy_cpu(size, -decay * batch, l.weights, 1, l.weight_updates, 1);
	axpy_cpu(size, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
	scal_cpu(size, momentum, l.weight_updates, 1);
}
void backward_rnn_layer(layer l, network_state state) {
	network_state s = { 0 };
	s.train = state.train;
	int i;
	layer input_layer = *(l.input_layer);
	layer self_layer = *(l.self_layer);
	layer output_layer = *(l.output_layer);

	increment_layer(&input_layer, l.steps - 1);
	increment_layer(&self_layer, l.steps - 1);
	increment_layer(&output_layer, l.steps - 1);

	l.state += l.hidden * l.batch * l.steps;
	for (i = l.steps - 1; i >= 0; --i) {
		copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
		axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

		s.input = l.state;
		s.delta = self_layer.delta;
		backward_connected_layer(output_layer, s);

		l.state -= l.hidden * l.batch;
		/*
		 if(i > 0){
		 copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1);
		 axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1);
		 }else{
		 fill_cpu(l.hidden * l.batch, 0, l.state, 1);
		 }
		 */

		s.input = l.state;
		s.delta = self_layer.delta - l.hidden * l.batch;
		if (i == 0)
			s.delta = 0;
		backward_connected_layer(self_layer, s);

		copy_cpu(l.hidden * l.batch, self_layer.delta, 1, input_layer.delta, 1);
		if (i > 0 && l.shortcut)
			axpy_cpu(l.hidden * l.batch, 1, self_layer.delta, 1,
					self_layer.delta - l.hidden * l.batch, 1);
		s.input = state.input + i * l.inputs * l.batch;
		if (state.delta)
			s.delta = state.delta + i * l.inputs * l.batch;
		else
			s.delta = 0;
		backward_connected_layer(input_layer, s);

		increment_layer(&input_layer, -1);
		increment_layer(&self_layer, -1);
		increment_layer(&output_layer, -1);
	}
}
示例#9
0
void merge_updates(layer l, layer base)
{
    if (l.type == CONVOLUTIONAL) {
        axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1);
        axpy_cpu(l.n*l.size*l.size*l.c, 1, l.weight_updates, 1, base.weight_updates, 1);
        if (l.scale_updates) {
            axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1);
        }
    } else if(l.type == CONNECTED) {
        axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1);
        axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1);
    }
}
示例#10
0
void update_deconvolutional_layer(layer l, int batch, float learning_rate, float momentum, float decay)
{
    int size = l.size*l.size*l.c*l.n;
    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    if(l.scales){
        axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
        scal_cpu(l.n, momentum, l.scale_updates, 1);
    }

    axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(size, momentum, l.weight_updates, 1);
}
示例#11
0
void backward_connected_layer(connected_layer l, network_state state)
{
    int i;
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }
    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float *a = l.delta;
    float *b = state.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta;
    b = l.weights;
    c = state.delta;

    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
示例#12
0
void read_and_add_into(int fd, float *a, int n)
{
    float *buff = calloc(n, sizeof(float));
    read_all(fd, (char*) buff, n*sizeof(float));
    axpy_cpu(n, 1, buff, 1, a, 1);
    free(buff);
}
示例#13
0
void backward_connected_layer(connected_layer l, network_state state)
{
    int i;
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }
    if(l.batch_normalize){
        backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates);

        scale_bias(l.delta, l.scales, l.batch, l.outputs, 1);

        mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta);
        variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta);
        normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta);
    }

    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float *a = l.delta;
    float *b = state.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta;
    b = l.weights;
    c = state.delta;

    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
示例#14
0
void predict_move(network net, float *board, float *move, int multi)
{
    float *output = network_predict(net, board);
    copy_cpu(19*19, output, 1, move, 1);
    int i;
    if(multi){
        image bim = float_to_image(19, 19, 1, board);
        for(i = 1; i < 8; ++i){
            rotate_image_cw(bim, i);
            if(i >= 4) flip_image(bim);

            float *output = network_predict(net, board);
            image oim = float_to_image(19, 19, 1, output);

            if(i >= 4) flip_image(oim);
            rotate_image_cw(oim, -i);

            axpy_cpu(19*19, 1, output, 1, move, 1);

            if(i >= 4) flip_image(bim);
            rotate_image_cw(bim, -i);
        }
        scal_cpu(19*19, 1./8., move, 1);
    }
    for(i = 0; i < 19*19; ++i){
        if(board[i]) move[i] = 0;
    }
}
示例#15
0
void test_mnist_multi(char *filename, char *weightfile)
{
    network net = parse_network_cfg(filename);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    set_batch_network(&net, 1);
    srand(time(0));

    float avg_acc = 0;
    data test;
    test = load_mnist_data("data/mnist/t10k-images.idx3-ubyte", "data/mnist/t10k-labels.idx1-ubyte", 10000);

    int i;
    for(i = 0; i < test.X.rows; ++i){
        image im = float_to_image(28, 28, 1, test.X.vals[i]);

        float pred[10] = {0};

        float *p = network_predict(net, im.data);
        axpy_cpu(10, 1, p, 1, pred, 1);
      //  flip_image(im);
        image im1 = rotate_image(im, -2.0*3.1415926/180.0);
        image im2 = rotate_image(im, 2.0*3.1415926/180.0);
        image im3 = rotate_image(im, -3.0*3.1415926/180.0);
        image im4 = rotate_image(im, 3.0*3.1415926/180.0);
        p = network_predict(net, im1.data);
        axpy_cpu(10, 1, p, 1, pred, 1);
        p = network_predict(net, im2.data);
        axpy_cpu(10, 1, p, 1, pred, 1);
        p = network_predict(net, im3.data);
        axpy_cpu(10, 1, p, 1, pred, 1);
        p = network_predict(net, im4.data);
        axpy_cpu(10, 1, p, 1, pred, 1);

        int index = max_index(pred, 10);
        int class = max_index(test.y.vals[i], 10);
        if(index == class) avg_acc += 1;
        free_image(im);
        free_image(im1);
        free_image(im2);
        free_image(im3);
        free_image(im4);
        printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1));
    }
    printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1));
}
示例#16
0
void average(int argc, char *argv[])
{
    char *cfgfile = argv[2];
    char *outfile = argv[3];
    gpu_index = -1;
    network net = parse_network_cfg(cfgfile);
    network sum = parse_network_cfg(cfgfile);

    char *weightfile = argv[4];   
    load_weights(&sum, weightfile);

    int i, j;
    int n = argc - 5;
    for(i = 0; i < n; ++i){
        weightfile = argv[i+5];   
        load_weights(&net, weightfile);
        for(j = 0; j < net.n; ++j){
            layer l = net.layers[j];
            layer out = sum.layers[j];
            if(l.type == CONVOLUTIONAL){
                int num = l.n*l.c*l.size*l.size;
                axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1);
                axpy_cpu(num, 1, l.filters, 1, out.filters, 1);
            }
            if(l.type == CONNECTED){
                axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1);
                axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1);
            }
        }
    }
    n = n+1;
    for(j = 0; j < net.n; ++j){
        layer l = sum.layers[j];
        if(l.type == CONVOLUTIONAL){
            int num = l.n*l.c*l.size*l.size;
            scal_cpu(l.n, 1./n, l.biases, 1);
            scal_cpu(num, 1./n, l.filters, 1);
        }
        if(l.type == CONNECTED){
            scal_cpu(l.outputs, 1./n, l.biases, 1);
            scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1);
        }
    }
    save_weights(sum, outfile);
}
示例#17
0
int main()
{
	int N, N2;
	printf(" \n Input matrix size N x N, N = ");
	scanf("%d", &N);
	printf(" N = %d \n \n", N);
	N2 = N*N;

	double *A, *B, *C_cpu, *C_gpu, *D_cpu, *D_gpu, t1, t2, cpu_time, gpu_time;
	double r_cpu, *r_gpu, nrmC_cpu, *nrmC_gpu;

	A = (double *) malloc(N2*sizeof(double));
	B = (double *) malloc(N2*sizeof(double));
	C_cpu = (double *) malloc(N2*sizeof(double));
	C_gpu = (double *) malloc(N2*sizeof(double));
	D_cpu = (double *) malloc(N2*sizeof(double));
	D_gpu = (double *) malloc(N2*sizeof(double));

	r_gpu = (double *) malloc(1*sizeof(double));
	nrmC_gpu = (double *) malloc(1*sizeof(double));

	initial(A, B, N);

	t1 = clock();

	#pragma acc data copyin(A[0:N2], B[0:N2]) copyout(C_cpu[0:N2])
	{
		cublas_gemm(A, B, C_cpu, N);
	}
	r_cpu = dot_cpu(C_cpu, B, N2);
	axpy_cpu(-1.0*r_cpu, B, C_cpu, N2);
	nrmC_cpu = norm_cpu(C_cpu, N2);
	copy_cpu(C_cpu, D_cpu, N2);
	scal_cpu(1.0/nrmC_cpu, D_cpu, N2);

	t2 = clock();
	cpu_time = 1.0*(t2 - t1)/CLOCKS_PER_SEC;

	t1 = clock();

	#pragma acc enter data copyin(A[0:N2], B[0:N2]) create(C_gpu[0:N2], r_gpu[0], nrmC_gpu[0], D_gpu[0:N2])
	{
		gpu_cublas1(A, B, C_gpu, D_gpu, r_gpu, nrmC_gpu, N, N2);
	}
	#pragma acc update host(D_gpu[0:N2])

	t2 = clock();
	gpu_time = 1.0*(t2 - t1)/CLOCKS_PER_SEC;
	printf(" gpu part success \n");


	printf(" \n error = %f \n", error(D_cpu, D_gpu, N2));
	printf(" gpu time = %f, cpu times = %f \n", gpu_time, cpu_time);

	return 0;
}
示例#18
0
void update_convolutional_layer(convolutional_layer l, update_args a)
{
    float learning_rate = a.learning_rate*l.learning_rate_scale;
    float momentum = a.momentum;
    float decay = a.decay;
    int batch = a.batch;

    axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
    scal_cpu(l.n, momentum, l.bias_updates, 1);

    if(l.scales){
        axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
        scal_cpu(l.n, momentum, l.scale_updates, 1);
    }

    axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1);
    axpy_cpu(l.nweights, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
    scal_cpu(l.nweights, momentum, l.weight_updates, 1);
}
void update_deconvolutional_layer(layer l, update_args a) {
	real_t learning_rate = a.learning_rate * l.learning_rate_scale;
	real_t momentum = a.momentum;
	real_t decay = a.decay;
	int batch = a.batch;

	int size = l.size * l.size * l.c * l.n;
	axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1);
	scal_cpu(l.n, momentum, l.bias_updates, 1);

	if (l.scales) {
		axpy_cpu(l.n, learning_rate / batch, l.scale_updates, 1, l.scales, 1);
		scal_cpu(l.n, momentum, l.scale_updates, 1);
	}

	axpy_cpu(size, -decay * batch, l.weights, 1, l.weight_updates, 1);
	axpy_cpu(size, learning_rate / batch, l.weight_updates, 1, l.weights, 1);
	scal_cpu(size, momentum, l.weight_updates, 1);
}
示例#20
0
float cuda_compare(float *x_gpu, float *x, size_t n, char *s) {
	float *tmp = calloc(n, sizeof(float));
	cuda_pull_array(x_gpu, tmp, n);
	//int i;
	//for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
	axpy_cpu(n, -1, x, 1, tmp, 1);
	float err = dot_cpu(n, tmp, 1, tmp, 1);
	printf("Error %s: %f\n", s, sqrt(err / n));
	free(tmp);
	return err;
}
示例#21
0
void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size)
{
    scale_image(recon, 2);
    translate_image(recon, -1);

    image delta = make_image(recon.w, recon.h, recon.c);

    network_state state = {0};
#ifdef GPU
    state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c);
    state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
    state.truth = cuda_make_array(features, get_network_output_size(net));

    forward_network_gpu(net, state);
    backward_network_gpu(net, state);

    cuda_pull_array(state.delta, delta.data, delta.w*delta.h*delta.c);

    cuda_free(state.input);
    cuda_free(state.delta);
    cuda_free(state.truth);
#else
    state.input = recon.data;
    state.delta = delta.data;
    state.truth = features;

    forward_network(net, state);
    backward_network(net, state);
#endif

    axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1);
    smooth(recon, update, lambda, smooth_size);

    axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1);
    scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1);

    translate_image(recon, 1);
    scale_image(recon, .5);
    constrain_image(recon);
    free_image(delta);
}
void forward_rnn_layer(layer l, network_state state) {
	network_state s = { 0 };
	s.train = state.train;
	int i;
	layer input_layer = *(l.input_layer);
	layer self_layer = *(l.self_layer);
	layer output_layer = *(l.output_layer);

	fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
	fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
	fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
	if (state.train)
		fill_cpu(l.hidden * l.batch, 0, l.state, 1);

	for (i = 0; i < l.steps; ++i) {
		s.input = state.input;
		forward_connected_layer(input_layer, s);

		s.input = l.state;
		forward_connected_layer(self_layer, s);

		float *old_state = l.state;
		if (state.train)
			l.state += l.hidden * l.batch;
		if (l.shortcut) {
			copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
		} else {
			fill_cpu(l.hidden * l.batch, 0, l.state, 1);
		}
		axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
		axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

		s.input = l.state;
		forward_connected_layer(output_layer, s);

		state.input += l.inputs * l.batch;
		increment_layer(&input_layer, 1);
		increment_layer(&self_layer, 1);
		increment_layer(&output_layer, 1);
	}
}
示例#23
0
void backward_compact_layer(const layer l, network_state state)
{
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    int i, b;
    for (b=0;b<l.batch;b++)
    {
        for (i=0;i<l.index;i++)
        {
            axpy_cpu(l.outputs, 1, l.delta+b*l.outputs, 1, state.delta+b*l.inputs+i*l.outputs, 1);
        }
    }
}
示例#24
0
文件: darknet.c 项目: kunle12/darknet
void average(int argc, char *argv[])
{
    char *cfgfile = argv[2];
    char *outfile = argv[3];
    gpu_index = -1;
    network *net = parse_network_cfg(cfgfile);
    network *sum = parse_network_cfg(cfgfile);

    char *weightfile = argv[4];
    load_weights(sum, weightfile);

    int i, j;
    int n = argc - 5;
    for(i = 0; i < n; ++i){
        weightfile = argv[i+5];
        load_weights(net, weightfile);
        for(j = 0; j < net->n; ++j){
            layer l = net->layers[j];
            layer out = sum->layers[j];
            if(l.type == CONVOLUTIONAL){
                int num = l.n*l.c*l.size*l.size;
                axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1);
                axpy_cpu(num, 1, l.weights, 1, out.weights, 1);
                if(l.batch_normalize){
                    axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1);
                    axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1);
                    axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1);
                }
            }
            if(l.type == CONNECTED){
                axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1);
                axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1);
            }
        }
    }
    n = n+1;
    for(j = 0; j < net->n; ++j){
        layer l = sum->layers[j];
        if(l.type == CONVOLUTIONAL){
            int num = l.n*l.c*l.size*l.size;
            scal_cpu(l.n, 1./n, l.biases, 1);
            scal_cpu(num, 1./n, l.weights, 1);
                if(l.batch_normalize){
                    scal_cpu(l.n, 1./n, l.scales, 1);
                    scal_cpu(l.n, 1./n, l.rolling_mean, 1);
                    scal_cpu(l.n, 1./n, l.rolling_variance, 1);
                }
        }
        if(l.type == CONNECTED){
            scal_cpu(l.outputs, 1./n, l.biases, 1);
            scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1);
        }
    }
    save_weights(sum, outfile);
}
示例#25
0
void backward_route_layer(const route_layer l, network_state state)
{
    int i, j;
    int offset = 0;
    for(i = 0; i < l.n; ++i){
        int index = l.input_layers[i];
        float *delta = state.net.layers[index].delta;
        int input_size = l.input_sizes[i];
        for(j = 0; j < l.batch; ++j){
            axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1);
        }
        offset += input_size;
    }
}
示例#26
0
void forward_cost_layer(cost_layer l, network_state state)
{
    if (!state.truth) return;
    if(l.cost_type == MASKED){
        int i;
        for(i = 0; i < l.batch*l.inputs; ++i){
            if(state.truth[i] == 0) state.input[i] = 0;
        }
    }
    copy_cpu(l.batch*l.inputs, state.truth, 1, l.delta, 1);
    axpy_cpu(l.batch*l.inputs, -1, state.input, 1, l.delta, 1);
    *(l.output) = dot_cpu(l.batch*l.inputs, l.delta, 1, l.delta, 1);
    //printf("cost: %f\n", *l.output);
}
示例#27
0
void forward_compact_layer(const layer l, network_state state)
{
    int i, b;
    for (b=0;b<l.batch;b++)
    {
        // copy first section
        copy_cpu(l.outputs, state.input+b*l.inputs, 1, l.output+b*l.outputs, 1);
        // add other splits
        for (i=1;i<l.index;i++)
        {
            axpy_cpu(l.outputs, 1, state.input+b*l.inputs+i*l.outputs, 1, l.output+b*l.outputs, 1);
        }
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
示例#28
0
void forward_cost_layer(cost_layer l, network_state state)
{
    if (!state.truth) return;
    if(l.cost_type == MASKED){
        int i;
        for(i = 0; i < l.batch*l.inputs; ++i){
            if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM;
        }
    }
    if(l.cost_type == SMOOTH){
        smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta);
    } else {
        copy_cpu(l.batch*l.inputs, state.truth, 1, l.delta, 1);
        axpy_cpu(l.batch*l.inputs, -1, state.input, 1, l.delta, 1);
    }
    *(l.output) = dot_cpu(l.batch*l.inputs, l.delta, 1, l.delta, 1);
    //printf("cost: %f\n", *l.output);
}
示例#29
0
void backward_local_layer(local_layer l, network_state state)
{
    int i, j;
    int locations = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
        im2col_cpu(input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, l.col_image);

        for(j = 0; j < locations; ++j){ 
            float *a = l.delta + i*l.outputs + j;
            float *b = l.col_image + j;
            float *c = l.filter_updates + j*l.size*l.size*l.c*l.n;
            int m = l.n;
            int n = l.size*l.size*l.c;
            int k = 1;

            gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
        }

        if(state.delta){
            for(j = 0; j < locations; ++j){ 
                float *a = l.filters + j*l.size*l.size*l.c*l.n;
                float *b = l.delta + i*l.outputs + j;
                float *c = l.col_image + j;

                int m = l.size*l.size*l.c;
                int n = 1;
                int k = l.n;

                gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
            }

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}
示例#30
0
void backward_shortcut_layer(const layer l, network net)
{
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, net.delta, 1);
    shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, net.layers[l.index].delta);
}