void forward_local_layer_gpu(const local_layer l, network_state state)
{
	int out_h = local_out_height(l);
	int out_w = local_out_width(l);
	int i, j;
	int locations = out_h * out_w;

	for(i = 0; i < l.batch; ++i) {
		copy_ongpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1, state.st_handle.stream);
	}

	for(i = 0; i < l.batch; ++i) {
		float *input = state.input + i*l.w*l.h*l.c;
		im2col_ongpu(input, l.c, l.h, l.w,
				l.size, l.stride, l.pad, l.col_image_gpu, state.st_handle.stream);
		float *output = l.output_gpu + i*l.outputs;
		for(j = 0; j < locations; ++j) {
			float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
			float *b = l.col_image_gpu + j;
			float *c = output + j;

			int m = l.n;
			int n = 1;
			int k = l.size*l.size*l.c;

			//printf("passou no local layer forward_local_layer_gpu\n");
			gemm_ongpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations, state.st_handle);
		}
	}
	activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, state.st_handle.stream);
}
Exemplo n.º 2
0
void forward_compact_layer_gpu(const layer l, network_state state)
{
    int i, b;
    for (b=0;b<l.batch;b++)
    {
        if (l.method==0) // add
        {
            // copy first section
            copy_ongpu(l.outputs, state.input+b*l.inputs, 1, l.output_gpu+b*l.outputs, 1);
            // add other splits
            for (i=1;i<l.index;i++)
            {
                axpy_ongpu(l.outputs, 1, state.input+b*l.inputs+i*l.outputs, 1, l.output_gpu+b*l.outputs, 1);
            }
        } else if (l.method==1) // sub
        {
            // copy first section
            copy_ongpu(l.outputs, state.input+b*l.inputs, 1, l.output_gpu+b*l.outputs, 1);
            // sub other splits
            for (i=1;i<l.index;i++)
            {
                axpy_ongpu(l.outputs, -1, state.input+b*l.inputs+i*l.outputs, 1, l.output_gpu+b*l.outputs, 1);
            }
        } else if (l.method==2) // max
        {
            compact_forward_max_gpu(l.w, l.h, l.c, l.index,
                                    state.input+b*l.inputs,
                                    l.output_gpu+b*l.outputs,
                                    l.indexes_gpu);
        } else if (l.method==10)
        {
            compact_forward_padd_gpu(l.w, l.h, l.c,
                                     state.input+b*l.inputs,
                                     l.output_gpu+b*l.outputs);
        } else if (l.method==12)
        {
            compact_forward_pmax_gpu(l.w, l.h, l.c,
                                     state.input+b*l.inputs,
                                     l.output_gpu+b*l.outputs,
                                     l.indexes_gpu);
        }
    }
    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
Exemplo n.º 3
0
void forward_connected_layer_gpu(connected_layer l, network_state state)
{
    int i;
    fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);

    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float * a = state.input;
    float * b = l.weights_gpu;
    float * c = l.output_gpu;
    gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(l.batch_normalize){
        forward_batchnorm_layer_gpu(l, state);
    }
    for(i = 0; i < l.batch; ++i){
        axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
    }
    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
Exemplo n.º 4
0
void forward_connected_layer_gpu(connected_layer l, network_state state)
{
    int i;
    for(i = 0; i < l.batch; ++i){
        copy_ongpu_offset(l.outputs, l.biases_gpu, 0, 1, l.output_gpu, i*l.outputs, 1);
    }
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float * a = state.input;
    float * b = l.weights_gpu;
    float * c = l.output_gpu;
    gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);

/*
    cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch);
    float avg = mean_array(l.output, l.outputs*l.batch);
    printf("%f\n", avg);
    */
}
Exemplo n.º 5
0
void forward_shortcut_layer_gpu(const layer l, network_state state)
{
    copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
    shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu);
    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}