void forward_convolutional_layer(convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    /*
       if(l.binary){
       binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters);
       binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales);
       swap_binary(&l);
       }
     */

    if(l.binary){
        int m = l.n;
        int k = l.size*l.size*l.c;
        int n = out_h*out_w;

        char  *a = l.cfilters;
        float *b = state.workspace;
        float *c = l.output;

        for(i = 0; i < l.batch; ++i){
            im2col_cpu(state.input, l.c, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm_bin(m,n,k,1,a,k,b,n,c,n);
            c += n*m;
            state.input += l.c*l.h*l.w;
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
        add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
        activate_array(l.output, m*n*l.batch, l.activation);
        return;
    }

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = state.workspace;
    float *c = l.output;

    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

    activate_array(l.output, m*n*l.batch, l.activation);
}
void forward_convolutional_layer(const convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    bias_output(l.output, l.biases, l.batch, l.n, out_h*out_w);

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = l.col_image;
    float *c = l.output;

    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
            l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }
    activate_array(l.output, m*n*l.batch, l.activation);
}
Exemple #3
0
void forward_connected_layer(connected_layer l, network_state state)
{
    int i;
    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
            variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);

            scal_cpu(l.outputs, .95, l.rolling_mean, 1);
            axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
            scal_cpu(l.outputs, .95, l.rolling_variance, 1);
            axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);

            copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);   
            copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
        }
        scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
    }
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1);
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
void forward_local_layer(const local_layer l, network_state state) {
	int out_h = local_out_height(l);
	int out_w = local_out_width(l);
	int i, j;
	int locations = out_h * out_w;

	for (i = 0; i < l.batch; ++i) {
		copy_cpu(l.outputs, l.biases, 1, l.output + i * l.outputs, 1);
	}

	for (i = 0; i < l.batch; ++i) {
		float *input = state.input + i * l.w * l.h * l.c;
		im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, l.col_image);
		float *output = l.output + i * l.outputs;
		for (j = 0; j < locations; ++j) {
			float *a = l.weights + j * l.size * l.size * l.c * l.n;
			float *b = l.col_image + j;
			float *c = output + j;

			int m = l.n;
			int n = 1;
			int k = l.size * l.size * l.c;

			gemm(0, 0, m, n, k, 1, a, k, b, locations, 1, c, locations);
		}
	}
	activate_array(l.output, l.outputs * l.batch, l.activation);
}
void forward_deconvolutional_layer(const layer l, network net) {
	int i;

	int m = l.size * l.size * l.n;
	int n = l.h * l.w;
	int k = l.c;

	fill_cpu(l.outputs * l.batch, 0, l.output, 1);

	for (i = 0; i < l.batch; ++i) {
		real_t *a = l.weights;
		real_t *b = net.input + i * l.c * l.h * l.w;
		real_t *c = net.workspace;

		gemm_cpu(1, 0, m, n, k, 1, a, m, b, n, 0, c, n);

		col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride,
				l.pad, l.output + i * l.outputs);
	}
	if (l.batch_normalize) {
		forward_batchnorm_layer(l, net);
	} else {
		add_bias(l.output, l.biases, l.batch, l.n, l.out_w * l.out_h);
	}
	activate_array(l.output, l.batch * l.n * l.out_w * l.out_h, l.activation);
}
void forward_deconvolutional_layer(const layer l, network_state state)
{
    int i;
    int out_h = l.out_h;
    int out_w = l.out_w;
    int size = out_h*out_w;

    int m = l.size*l.size*l.n;
    int n = l.h*l.w;
    int k = l.c;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    for(i = 0; i < l.batch; ++i){
        float *a = l.weights;
        float *b = state.input + i*l.c*l.h*l.w;
        float *c = state.workspace;

        gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);

        col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }
    activate_array(l.output, l.batch*l.n*size, l.activation);
}
Exemple #7
0
void forward_connected_layer(connected_layer l, network_state state)
{
    int i;
    for(i = 0; i < l.batch; ++i){
        copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1);
    }
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
Exemple #8
0
void forward_compact_layer(const layer l, network_state state)
{
    int i, b;
    for (b=0;b<l.batch;b++)
    {
        // copy first section
        copy_cpu(l.outputs, state.input+b*l.inputs, 1, l.output+b*l.outputs, 1);
        // add other splits
        for (i=1;i<l.index;i++)
        {
            axpy_cpu(l.outputs, 1, state.input+b*l.inputs+i*l.outputs, 1, l.output+b*l.outputs, 1);
        }
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
void forward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    if(l.xnor){
        binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
        swap_binary(&l);
        binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
        net.input = l.binary_input;
    }

    int m = l.n/l.groups;
    int k = l.size*l.size*l.c/l.groups;
    int n = l.out_w*l.out_h;
    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.weights + j*l.nweights/l.groups;
            float *b = net.workspace;
            float *c = l.output + (i*l.groups + j)*n*m;

            im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
                l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
            gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, net);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }

    activate_array(l.output, l.outputs*l.batch, l.activation);
    if(l.binary || l.xnor) swap_binary(&l);
}
Exemple #10
0
void forward_shortcut_layer(const layer l, network net)
{
    copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
    shortcut_cpu(l.batch, l.w, l.h, l.c, net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output);
    activate_array(l.output, l.outputs*l.batch, l.activation);
}
void backward_lstm_layer(layer l, network state) {
	network s = { 0 };
	s.train = state.train;
	int i;
	layer wf = *(l.wf);
	layer wi = *(l.wi);
	layer wg = *(l.wg);
	layer wo = *(l.wo);

	layer uf = *(l.uf);
	layer ui = *(l.ui);
	layer ug = *(l.ug);
	layer uo = *(l.uo);

	increment_layer(&wf, l.steps - 1);
	increment_layer(&wi, l.steps - 1);
	increment_layer(&wg, l.steps - 1);
	increment_layer(&wo, l.steps - 1);

	increment_layer(&uf, l.steps - 1);
	increment_layer(&ui, l.steps - 1);
	increment_layer(&ug, l.steps - 1);
	increment_layer(&uo, l.steps - 1);

	state.input += l.inputs * l.batch * (l.steps - 1);
	if (state.delta)
		state.delta += l.inputs * l.batch * (l.steps - 1);

	l.output += l.outputs * l.batch * (l.steps - 1);
	l.cell_cpu += l.outputs * l.batch * (l.steps - 1);
	l.delta += l.outputs * l.batch * (l.steps - 1);

	for (i = l.steps - 1; i >= 0; --i) {
		if (i != 0)
			copy_cpu(l.outputs * l.batch, l.cell_cpu - l.outputs * l.batch, 1,
					l.prev_cell_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.cell_cpu, 1, l.c_cpu, 1);
		if (i != 0)
			copy_cpu(l.outputs * l.batch, l.output - l.outputs * l.batch, 1,
					l.prev_state_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.output, 1, l.h_cpu, 1);

		l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs * l.batch;

		copy_cpu(l.outputs * l.batch, wf.output, 1, l.f_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uf.output, 1, l.f_cpu, 1);

		copy_cpu(l.outputs * l.batch, wi.output, 1, l.i_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ui.output, 1, l.i_cpu, 1);

		copy_cpu(l.outputs * l.batch, wg.output, 1, l.g_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ug.output, 1, l.g_cpu, 1);

		copy_cpu(l.outputs * l.batch, wo.output, 1, l.o_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uo.output, 1, l.o_cpu, 1);

		activate_array(l.f_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.i_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.g_cpu, l.outputs * l.batch, TANH);
		activate_array(l.o_cpu, l.outputs * l.batch, LOGISTIC);

		copy_cpu(l.outputs * l.batch, l.delta, 1, l.temp3_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1);
		activate_array(l.temp_cpu, l.outputs * l.batch, TANH);

		copy_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.o_cpu, 1, l.temp2_cpu, 1);

		gradient_array(l.temp_cpu, l.outputs * l.batch, TANH, l.temp2_cpu);
		axpy_cpu(l.outputs * l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1);
		activate_array(l.temp_cpu, l.outputs * l.batch, TANH);
		mul_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.o_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wo.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wo, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uo.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(uo, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.i_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.g_cpu, l.outputs * l.batch, TANH, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wg.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wg, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ug.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(ug, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.g_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.i_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wi.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wi, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ui.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(ui, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.f_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wf.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wf, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uf.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(uf, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.f_cpu, 1, l.temp_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, l.dc_cpu, 1);

		state.input -= l.inputs * l.batch;
		if (state.delta)
			state.delta -= l.inputs * l.batch;
		l.output -= l.outputs * l.batch;
		l.cell_cpu -= l.outputs * l.batch;
		l.delta -= l.outputs * l.batch;

		increment_layer(&wf, -1);
		increment_layer(&wi, -1);
		increment_layer(&wg, -1);
		increment_layer(&wo, -1);

		increment_layer(&uf, -1);
		increment_layer(&ui, -1);
		increment_layer(&ug, -1);
		increment_layer(&uo, -1);
	}
}
void forward_lstm_layer(layer l, network state) {
	network s = { 0 };
	s.train = state.train;
	int i;
	layer wf = *(l.wf);
	layer wi = *(l.wi);
	layer wg = *(l.wg);
	layer wo = *(l.wo);

	layer uf = *(l.uf);
	layer ui = *(l.ui);
	layer ug = *(l.ug);
	layer uo = *(l.uo);

	fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1);

	fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1);
	fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1);
	if (state.train) {
		fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1);
	}

	for (i = 0; i < l.steps; ++i) {
		s.input = l.h_cpu;
		forward_connected_layer(wf, s);
		forward_connected_layer(wi, s);
		forward_connected_layer(wg, s);
		forward_connected_layer(wo, s);

		s.input = state.input;
		forward_connected_layer(uf, s);
		forward_connected_layer(ui, s);
		forward_connected_layer(ug, s);
		forward_connected_layer(uo, s);

		copy_cpu(l.outputs * l.batch, wf.output, 1, l.f_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uf.output, 1, l.f_cpu, 1);

		copy_cpu(l.outputs * l.batch, wi.output, 1, l.i_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ui.output, 1, l.i_cpu, 1);

		copy_cpu(l.outputs * l.batch, wg.output, 1, l.g_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ug.output, 1, l.g_cpu, 1);

		copy_cpu(l.outputs * l.batch, wo.output, 1, l.o_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uo.output, 1, l.o_cpu, 1);

		activate_array(l.f_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.i_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.g_cpu, l.outputs * l.batch, TANH);
		activate_array(l.o_cpu, l.outputs * l.batch, LOGISTIC);

		copy_cpu(l.outputs * l.batch, l.i_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.g_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.f_cpu, 1, l.c_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.h_cpu, 1);
		activate_array(l.h_cpu, l.outputs * l.batch, TANH);
		mul_cpu(l.outputs * l.batch, l.o_cpu, 1, l.h_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.cell_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.h_cpu, 1, l.output, 1);

		state.input += l.inputs * l.batch;
		l.output += l.outputs * l.batch;
		l.cell_cpu += l.outputs * l.batch;

		increment_layer(&wf, 1);
		increment_layer(&wi, 1);
		increment_layer(&wg, 1);
		increment_layer(&wo, 1);

		increment_layer(&uf, 1);
		increment_layer(&ui, 1);
		increment_layer(&ug, 1);
		increment_layer(&uo, 1);
	}
}
Exemple #13
0
void forward_detection_layer(const detection_layer l, network_state state)
{
    int in_i = 0;
    int out_i = 0;
    int locations = get_detection_layer_locations(l);
    int i,j;
    for(i = 0; i < l.batch*locations; ++i){
        int mask = (!state.truth || state.truth[out_i + (l.background || l.objectness) + l.classes + 2]);
        float scale = 1;
        if(l.joint) scale = state.input[in_i++];
        else if(l.objectness){
            l.output[out_i++] = 1-state.input[in_i++];
            scale = mask;
        }
        else if(l.background) l.output[out_i++] = scale*state.input[in_i++];

        for(j = 0; j < l.classes; ++j){
            l.output[out_i++] = scale*state.input[in_i++];
        }
        if(l.objectness){

        }else if(l.background){
            softmax_array(l.output + out_i - l.classes-l.background, l.classes+l.background, l.output + out_i - l.classes-l.background);
            activate_array(state.input+in_i, l.coords, LOGISTIC);
        }
        for(j = 0; j < l.coords; ++j){
            l.output[out_i++] = mask*state.input[in_i++];
        }
    }
    float avg_iou = 0;
    int count = 0;
    if(l.does_cost && state.train){
        *(l.cost) = 0;
        int size = get_detection_layer_output_size(l) * l.batch;
        memset(l.delta, 0, size * sizeof(float));
        for (i = 0; i < l.batch*locations; ++i) {
            int classes = l.objectness+l.classes;
            int offset = i*(classes+l.coords);
            for (j = offset; j < offset+classes; ++j) {
                *(l.cost) += pow(state.truth[j] - l.output[j], 2);
                l.delta[j] =  state.truth[j] - l.output[j];
            }

            box truth;
            truth.x = state.truth[j+0]/7;
            truth.y = state.truth[j+1]/7;
            truth.w = pow(state.truth[j+2], 2);
            truth.h = pow(state.truth[j+3], 2);
            box out;
            out.x = l.output[j+0]/7;
            out.y = l.output[j+1]/7;
            out.w = pow(l.output[j+2], 2);
            out.h = pow(l.output[j+3], 2);

            if(!(truth.w*truth.h)) continue;
            float iou = box_iou(out, truth);
            avg_iou += iou;
            ++count;
            dbox delta = diou(out, truth);

            l.delta[j+0] = 10 * delta.dx/7;
            l.delta[j+1] = 10 * delta.dy/7;
            l.delta[j+2] = 10 * delta.dw * 2 * sqrt(out.w);
            l.delta[j+3] = 10 * delta.dh * 2 * sqrt(out.h);


            *(l.cost) += pow((1-iou), 2);
            l.delta[j+0] = 4 * (state.truth[j+0] - l.output[j+0]);
            l.delta[j+1] = 4 * (state.truth[j+1] - l.output[j+1]);
            l.delta[j+2] = 4 * (state.truth[j+2] - l.output[j+2]);
            l.delta[j+3] = 4 * (state.truth[j+3] - l.output[j+3]);
            if(l.rescore){
                for (j = offset; j < offset+classes; ++j) {
                    if(state.truth[j]) state.truth[j] = iou;
                    l.delta[j] =  state.truth[j] - l.output[j];
                }
            }
        }
        printf("Avg IOU: %f\n", avg_iou/count);
    }
}
void forward_activation_layer(layer l, network net) {
	copy_cpu(l.outputs * l.batch, net.input, 1, l.output, 1);
	activate_array(l.output, l.outputs * l.batch, l.activation);
}
void forward_convolutional_layer(const convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = l.col_image;
    float *c = l.output;
//	printf("the l.size is %i \n", l.size);
///*

//printf("the m,k,n is %i,%i,%i \n", m,k,n);
    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }
//*/

//add by fanghao
 /*   int ii,jj,kk,mm,pp,tt;
    int lcc = l.c;
    int lhh = l.h;
    int lww = l.w;
    int kernel = l.size;
    int pad;
    if(l.pad)
         pad = l.size/2;
    else
	 pad = l.pad;
    lhh += 2*pad;
    lww += 2*pad;
    float *dataP;
    dataP = (float *)calloc(lcc*lhh*lww, sizeof(float));


//printf("the l.h is %i \n", l.h);
//printf("the l.w is %i \n", l.w);
//printf("the lhh is %i \n", lhh);
//printf("the lww is %i \n", lww);
//printf("the pad is %i \n", pad);




    for(ii=0; ii < lcc; ii++)
        for(jj=pad; jj<lhh-pad; jj++)
             for(kk=pad; kk<lww-pad; kk++)
                dataP[ii*lhh*lww + jj*lww + kk] = state.input[ii*(lhh - 2*pad)*(lww-2*pad) + (jj - pad)*(lww - 2*pad) + kk-pad];

    for(ii=0; ii<m; ii++)
        for(jj=0; jj<out_h; jj++)
            for(kk=0; kk<out_w; kk++) {
                float tempAcc = 0.0;
                    for(mm=0; mm<lcc; mm++)
                        for(pp=0; pp<kernel; pp++)
                            for(tt=0; tt<kernel; tt++)
                                tempAcc += a[ii*lcc*kernel*kernel+mm*kernel*kernel+pp*kernel+tt]*dataP[mm*lhh*lww+(l.stride*jj+pp)*lww+l.stride*kk+tt];
                c[ii*out_h*out_w+jj*out_w+kk] = tempAcc;
                        }
//	c += n*m;
//state.input += l.c*l.h*l.w;
//
*/



    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean);   
            variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance);   
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w);   
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.n, l.out_h*l.out_w);
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

    activate_array(l.output, m*n*l.batch, l.activation);
}
Exemple #16
0
void forward_detection_layer(const detection_layer l, network_state state)
{
    int locations = l.side*l.side;
    int i,j;
    memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
    int b;
    if (l.softmax){
        for(b = 0; b < l.batch; ++b){
            int index = b*l.inputs;
            for (i = 0; i < locations; ++i) {
                int offset = i*l.classes;
                softmax_array(l.output + index + offset, l.classes, 1,
                        l.output + index + offset);
            }
            int offset = locations*l.classes;
            activate_array(l.output + index + offset, locations*l.n*(1+l.coords), LOGISTIC);
        }
    }
    if(state.train){
        float avg_iou = 0;
        float avg_cat = 0;
        float avg_allcat = 0;
        float avg_obj = 0;
        float avg_anyobj = 0;
        int count = 0;
        *(l.cost) = 0;
        int size = l.inputs * l.batch;
        memset(l.delta, 0, size * sizeof(float));
        for (b = 0; b < l.batch; ++b){
            int index = b*l.inputs;
            for (i = 0; i < locations; ++i) {
                int truth_index = (b*locations + i)*(1+l.coords+l.classes);
                int is_obj = state.truth[truth_index];
                for (j = 0; j < l.n; ++j) {
                    int p_index = index + locations*l.classes + i*l.n + j;
                    l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]);
                    *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2);
                    avg_anyobj += l.output[p_index];
                }

                int best_index = -1;
                float best_iou = 0;
                float best_rmse = 20;

                if (!is_obj){
                    continue;
                }

                int class_index = index + i*l.classes;
                for(j = 0; j < l.classes; ++j) {
                    l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]);
                    *(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2);
                    if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
                    avg_allcat += l.output[class_index+j];
                }

                box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
                truth.x /= l.side;
                truth.y /= l.side;

                for(j = 0; j < l.n; ++j){
                    int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords;
                    box out = float_to_box(l.output + box_index);
                    out.x /= l.side;
                    out.y /= l.side;

                    if (l.sqrt){
                        out.w = out.w*out.w;
                        out.h = out.h*out.h;
                    }

                    float iou  = box_iou(out, truth);
                    //iou = 0;
                    float rmse = box_rmse(out, truth);
                    if(best_iou > 0 || iou > 0){
                        if(iou > best_iou){
                            best_iou = iou;
                            best_index = j;
                        }
                    }else{
                        if(rmse < best_rmse){
                            best_rmse = rmse;
                            best_index = j;
                        }
                    }
                }

                if(l.forced){
                    if(truth.w*truth.h < .1){
                        best_index = 1;
                    }else{
                        best_index = 0;
                    }
                }

                int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
                int tbox_index = truth_index + 1 + l.classes;

                box out = float_to_box(l.output + box_index);
                out.x /= l.side;
                out.y /= l.side;
                if (l.sqrt) {
                    out.w = out.w*out.w;
                    out.h = out.h*out.h;
                }
                float iou  = box_iou(out, truth);

                //printf("%d,", best_index);
                int p_index = index + locations*l.classes + i*l.n + best_index;
                *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
                *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);
                avg_obj += l.output[p_index];
                l.delta[p_index] = l.object_scale * (1.-l.output[p_index]);

                if(l.rescore){
                    l.delta[p_index] = l.object_scale * (iou - l.output[p_index]);
                }

                l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]);
                l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]);
                l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]);
                l.delta[box_index+3] = l.coord_scale*(state.truth[tbox_index + 3] - l.output[box_index + 3]);
                if(l.sqrt){
                    l.delta[box_index+2] = l.coord_scale*(sqrt(state.truth[tbox_index + 2]) - l.output[box_index + 2]);
                    l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]);
                }

                *(l.cost) += pow(1-iou, 2);
                avg_iou += iou;
                ++count;
            }
            if(l.softmax){
                gradient_array(l.output + index + locations*l.classes, locations*l.n*(1+l.coords), 
                        LOGISTIC, l.delta + index + locations*l.classes);
            }
        }
        if ( l.b_debug )
        {
            printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
        }
    }
}
void forward_iseg_layer(const layer l, network net) {

	double time = what_time_is_it_now();
	int i, b, j, k;
	int ids = l.extra;
	memcpy(l.output, net.input, l.outputs * l.batch * sizeof(real_t));
	memset(l.delta, 0, l.outputs * l.batch * sizeof(real_t));

#ifndef GPU
	for (b = 0; b < l.batch; ++b) {
		int index = b * l.outputs;
		activate_array(l.output + index, l.classes * l.w * l.h, LOGISTIC);
	}
#endif

	for (b = 0; b < l.batch; ++b) {
		// a priori, each pixel has no class
		for (i = 0; i < l.classes; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + i * l.w * l.h + k;
				l.delta[index] = 0 - l.output[index];
			}
		}

		// a priori, embedding should be small magnitude
		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] = .1 * (0 - l.output[index]);
			}
		}

		memset(l.counts, 0, 90 * sizeof(int));
		for (i = 0; i < 90; ++i) {
			fill_cpu(ids, 0, l.sums[i], 1);

			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			// add up metric embeddings for each instance
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + c * l.w * l.h + k;
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					l.delta[index] = v - l.output[index];
					axpy_cpu(ids, 1,
							l.output + b * l.outputs + l.classes * l.w * l.h
									+ k, l.w * l.h, l.sums[i], 1);
					++l.counts[i];
				}
			}
		}

		real_t *mse = calloc(90, sizeof(real_t));
		for (i = 0; i < 90; ++i) {
			int c = net.truth[b * l.truths + i * (l.w * l.h + 1)];
			if (c < 0)
				break;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					int z;
					real_t sum = 0;
					for (z = 0; z < ids; ++z) {
						int index = b * l.outputs + (l.classes + z) * l.w * l.h
								+ k;
						sum += pow(l.sums[i][z] / l.counts[i] - l.output[index],
								2);
					}
					mse[i] += sum;
				}
			}
			mse[i] /= l.counts[i];
		}

		// Calculate average embedding
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			scal_cpu(ids, 1.f / l.counts[i], l.sums[i], 1);
			if (b == 0 && net.gpu_index == 0) {
				printf("%4d, %6.3f, ", l.counts[i], mse[i]);
				for (j = 0; j < ids; ++j) {
					printf("%6.3f,", l.sums[i][j]);
				}
				printf("\n");
			}
		}
		free(mse);

		// Calculate embedding loss
		for (i = 0; i < 90; ++i) {
			if (!l.counts[i])
				continue;
			for (k = 0; k < l.w * l.h; ++k) {
				real_t v = net.truth[b * l.truths + i * (l.w * l.h + 1) + 1 + k];
				if (v) {
					for (j = 0; j < 90; ++j) {
						if (!l.counts[j])
							continue;
						int z;
						for (z = 0; z < ids; ++z) {
							int index = b * l.outputs
									+ (l.classes + z) * l.w * l.h + k;
							real_t diff = l.sums[j][z] - l.output[index];
							if (j == i)
								l.delta[index] += diff < 0 ? -.1 : .1;
							else
								l.delta[index] += -(diff < 0 ? -.1 : .1);
						}
					}
				}
			}
		}

		for (i = 0; i < ids; ++i) {
			for (k = 0; k < l.w * l.h; ++k) {
				int index = b * l.outputs + (i + l.classes) * l.w * l.h + k;
				l.delta[index] *= .01;
			}
		}
	}

	*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
	printf("took %lf sec\n", what_time_is_it_now() - time);
}