Пример #1
0
void backward_connected_layer(connected_layer l, network_state state)
{
    int i;
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }
    if(l.batch_normalize){
        backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates);

        scale_bias(l.delta, l.scales, l.batch, l.outputs, 1);

        mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta);
        variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta);
        normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta);
    }

    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float *a = l.delta;
    float *b = state.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta;
    b = l.weights;
    c = state.delta;

    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
Пример #2
0
void backward_detection_layer(const detection_layer l, network_state state)
{
    int locations = get_detection_layer_locations(l);
    int i,j;
    int in_i = 0;
    int out_i = 0;
    for(i = 0; i < l.batch*locations; ++i){
        float scale = 1;
        float latent_delta = 0;
        if(l.joint) scale = state.input[in_i++];
        else if (l.objectness)   state.delta[in_i++] += -l.delta[out_i++];
        else if (l.background) state.delta[in_i++] += scale*l.delta[out_i++];
        for(j = 0; j < l.classes; ++j){
            latent_delta += state.input[in_i]*l.delta[out_i];
            state.delta[in_i++] += scale*l.delta[out_i++];
        }

        if (l.objectness) {

        }else if (l.background) gradient_array(l.output + out_i, l.coords, LOGISTIC, l.delta + out_i);
        for(j = 0; j < l.coords; ++j){
            state.delta[in_i++] += l.delta[out_i++];
        }
        if(l.joint) state.delta[in_i-l.coords-l.classes-l.joint] += latent_delta;
    }
}
Пример #3
0
void backward_connected_layer(connected_layer l, network_state state)
{
    int i;
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }
    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float *a = l.delta;
    float *b = state.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta;
    b = l.weights;
    c = state.delta;

    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
void backward_convolutional_layer(convolutional_layer l, network_state state)
{
    int i;
    int m = l.n;
    int n = l.size*l.size*l.c;
    int k = convolutional_out_height(l)*
        convolutional_out_width(l);

    gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
    backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);

    for(i = 0; i < l.batch; ++i){
        float *a = l.delta + i*m*k;
        float *b = l.col_image;
        float *c = l.filter_updates;

        float *im = state.input+i*l.c*l.h*l.w;

        im2col_cpu(im, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

        if(state.delta){
            a = l.filters;
            b = l.delta + i*m*k;
            c = l.col_image;

            gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}
Пример #5
0
void backward_compact_layer(const layer l, network_state state)
{
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    int i, b;
    for (b=0;b<l.batch;b++)
    {
        for (i=0;i<l.index;i++)
        {
            axpy_cpu(l.outputs, 1, l.delta+b*l.outputs, 1, state.delta+b*l.inputs+i*l.outputs, 1);
        }
    }
}
Пример #6
0
void backward_local_layer(local_layer l, network_state state)
{
    int i, j;
    int locations = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
        im2col_cpu(input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, l.col_image);

        for(j = 0; j < locations; ++j){ 
            float *a = l.delta + i*l.outputs + j;
            float *b = l.col_image + j;
            float *c = l.filter_updates + j*l.size*l.size*l.c*l.n;
            int m = l.n;
            int n = l.size*l.size*l.c;
            int k = 1;

            gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
        }

        if(state.delta){
            for(j = 0; j < locations; ++j){ 
                float *a = l.filters + j*l.size*l.size*l.c*l.n;
                float *b = l.delta + i*l.outputs + j;
                float *c = l.col_image + j;

                int m = l.size*l.size*l.c;
                int n = 1;
                int k = l.n;

                gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
            }

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}
Пример #7
0
void backward_deconvolutional_layer(layer l, network_state state)
{
    float alpha = 1./l.batch;
    int out_h = deconvolutional_out_height(l);
    int out_w = deconvolutional_out_width(l);
    int size = out_h*out_w;
    int i;

    gradient_array(l.output, size*l.n*l.batch, l.activation, l.delta);
    if(l.batch_normalize){
        backward_batchnorm_layer(l, state);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
    }

    for(i = 0; i < l.batch; ++i){
        int m = l.c;
        int n = l.size*l.size*l.n;
        int k = l.h*l.w;

        float *a = state.input + i*m*n;
        float *b = state.workspace;
        float *c = l.weight_updates;

        im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w, 
                l.size, l.stride, 0, b);
        gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);

        if(state.delta){
            int m = l.c;
            int n = l.h*l.w;
            int k = l.size*l.size*l.n;

            float *a = l.weights;
            float *b = state.workspace;
            float *c = state.delta + i*n*m;

            gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }
}
void backward_deconvolutional_layer(layer l, network net)
{
    int i;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    if(l.batch_normalize){
        backward_batchnorm_layer(l, net);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
    }

    //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));

    for(i = 0; i < l.batch; ++i){
        int m = l.c;
        int n = l.size*l.size*l.n;
        int k = l.h*l.w;

        float *a = net.input + i*m*k;
        float *b = net.workspace;
        float *c = l.weight_updates;

        im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w, 
                l.size, l.stride, l.pad, b);
        gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n);

        if(net.delta){
            int m = l.c;
            int n = l.h*l.w;
            int k = l.size*l.size*l.n;

            float *a = l.weights;
            float *b = net.workspace;
            float *c = net.delta + i*n*m;

            gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }
}
Пример #9
0
void backward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;
    int m = l.n/l.groups;
    int n = l.size*l.size*l.c/l.groups;
    int k = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    if(l.batch_normalize){
        backward_batchnorm_layer(l, net);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
    }

    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.delta + (i*l.groups + j)*m*k;
            float *b = net.workspace;
            float *c = l.weight_updates + j*l.nweights/l.groups;

            float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w;

            im2col_cpu(im, l.c/l.groups, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

            if(net.delta){
                a = l.weights + j*l.nweights/l.groups;
                b = l.delta + (i*l.groups + j)*m*k;
                c = net.workspace;

                gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

                col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, 
                    l.pad, net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w);
            }
        }
    }
}
Пример #10
0
void backward_shortcut_layer(const layer l, network net)
{
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, net.delta, 1);
    shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, net.layers[l.index].delta);
}
Пример #11
0
void backward_lstm_layer(layer l, network state) {
	network s = { 0 };
	s.train = state.train;
	int i;
	layer wf = *(l.wf);
	layer wi = *(l.wi);
	layer wg = *(l.wg);
	layer wo = *(l.wo);

	layer uf = *(l.uf);
	layer ui = *(l.ui);
	layer ug = *(l.ug);
	layer uo = *(l.uo);

	increment_layer(&wf, l.steps - 1);
	increment_layer(&wi, l.steps - 1);
	increment_layer(&wg, l.steps - 1);
	increment_layer(&wo, l.steps - 1);

	increment_layer(&uf, l.steps - 1);
	increment_layer(&ui, l.steps - 1);
	increment_layer(&ug, l.steps - 1);
	increment_layer(&uo, l.steps - 1);

	state.input += l.inputs * l.batch * (l.steps - 1);
	if (state.delta)
		state.delta += l.inputs * l.batch * (l.steps - 1);

	l.output += l.outputs * l.batch * (l.steps - 1);
	l.cell_cpu += l.outputs * l.batch * (l.steps - 1);
	l.delta += l.outputs * l.batch * (l.steps - 1);

	for (i = l.steps - 1; i >= 0; --i) {
		if (i != 0)
			copy_cpu(l.outputs * l.batch, l.cell_cpu - l.outputs * l.batch, 1,
					l.prev_cell_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.cell_cpu, 1, l.c_cpu, 1);
		if (i != 0)
			copy_cpu(l.outputs * l.batch, l.output - l.outputs * l.batch, 1,
					l.prev_state_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.output, 1, l.h_cpu, 1);

		l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs * l.batch;

		copy_cpu(l.outputs * l.batch, wf.output, 1, l.f_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uf.output, 1, l.f_cpu, 1);

		copy_cpu(l.outputs * l.batch, wi.output, 1, l.i_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ui.output, 1, l.i_cpu, 1);

		copy_cpu(l.outputs * l.batch, wg.output, 1, l.g_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, ug.output, 1, l.g_cpu, 1);

		copy_cpu(l.outputs * l.batch, wo.output, 1, l.o_cpu, 1);
		axpy_cpu(l.outputs * l.batch, 1, uo.output, 1, l.o_cpu, 1);

		activate_array(l.f_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.i_cpu, l.outputs * l.batch, LOGISTIC);
		activate_array(l.g_cpu, l.outputs * l.batch, TANH);
		activate_array(l.o_cpu, l.outputs * l.batch, LOGISTIC);

		copy_cpu(l.outputs * l.batch, l.delta, 1, l.temp3_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1);
		activate_array(l.temp_cpu, l.outputs * l.batch, TANH);

		copy_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.o_cpu, 1, l.temp2_cpu, 1);

		gradient_array(l.temp_cpu, l.outputs * l.batch, TANH, l.temp2_cpu);
		axpy_cpu(l.outputs * l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1);

		copy_cpu(l.outputs * l.batch, l.c_cpu, 1, l.temp_cpu, 1);
		activate_array(l.temp_cpu, l.outputs * l.batch, TANH);
		mul_cpu(l.outputs * l.batch, l.temp3_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.o_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wo.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wo, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uo.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(uo, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.i_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.g_cpu, l.outputs * l.batch, TANH, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wg.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wg, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ug.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(ug, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.g_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.i_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wi.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wi, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, ui.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(ui, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1);
		gradient_array(l.f_cpu, l.outputs * l.batch, LOGISTIC, l.temp_cpu);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, wf.delta, 1);
		s.input = l.prev_state_cpu;
		s.delta = l.dh_cpu;
		backward_connected_layer(wf, s);

		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, uf.delta, 1);
		s.input = state.input;
		s.delta = state.delta;
		backward_connected_layer(uf, s);

		copy_cpu(l.outputs * l.batch, l.temp2_cpu, 1, l.temp_cpu, 1);
		mul_cpu(l.outputs * l.batch, l.f_cpu, 1, l.temp_cpu, 1);
		copy_cpu(l.outputs * l.batch, l.temp_cpu, 1, l.dc_cpu, 1);

		state.input -= l.inputs * l.batch;
		if (state.delta)
			state.delta -= l.inputs * l.batch;
		l.output -= l.outputs * l.batch;
		l.cell_cpu -= l.outputs * l.batch;
		l.delta -= l.outputs * l.batch;

		increment_layer(&wf, -1);
		increment_layer(&wi, -1);
		increment_layer(&wg, -1);
		increment_layer(&wo, -1);

		increment_layer(&uf, -1);
		increment_layer(&ui, -1);
		increment_layer(&ug, -1);
		increment_layer(&uo, -1);
	}
}
Пример #12
0
void forward_detection_layer(const detection_layer l, network_state state)
{
    int locations = l.side*l.side;
    int i,j;
    memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
    int b;
    if (l.softmax){
        for(b = 0; b < l.batch; ++b){
            int index = b*l.inputs;
            for (i = 0; i < locations; ++i) {
                int offset = i*l.classes;
                softmax_array(l.output + index + offset, l.classes, 1,
                        l.output + index + offset);
            }
            int offset = locations*l.classes;
            activate_array(l.output + index + offset, locations*l.n*(1+l.coords), LOGISTIC);
        }
    }
    if(state.train){
        float avg_iou = 0;
        float avg_cat = 0;
        float avg_allcat = 0;
        float avg_obj = 0;
        float avg_anyobj = 0;
        int count = 0;
        *(l.cost) = 0;
        int size = l.inputs * l.batch;
        memset(l.delta, 0, size * sizeof(float));
        for (b = 0; b < l.batch; ++b){
            int index = b*l.inputs;
            for (i = 0; i < locations; ++i) {
                int truth_index = (b*locations + i)*(1+l.coords+l.classes);
                int is_obj = state.truth[truth_index];
                for (j = 0; j < l.n; ++j) {
                    int p_index = index + locations*l.classes + i*l.n + j;
                    l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]);
                    *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2);
                    avg_anyobj += l.output[p_index];
                }

                int best_index = -1;
                float best_iou = 0;
                float best_rmse = 20;

                if (!is_obj){
                    continue;
                }

                int class_index = index + i*l.classes;
                for(j = 0; j < l.classes; ++j) {
                    l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]);
                    *(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2);
                    if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
                    avg_allcat += l.output[class_index+j];
                }

                box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
                truth.x /= l.side;
                truth.y /= l.side;

                for(j = 0; j < l.n; ++j){
                    int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords;
                    box out = float_to_box(l.output + box_index);
                    out.x /= l.side;
                    out.y /= l.side;

                    if (l.sqrt){
                        out.w = out.w*out.w;
                        out.h = out.h*out.h;
                    }

                    float iou  = box_iou(out, truth);
                    //iou = 0;
                    float rmse = box_rmse(out, truth);
                    if(best_iou > 0 || iou > 0){
                        if(iou > best_iou){
                            best_iou = iou;
                            best_index = j;
                        }
                    }else{
                        if(rmse < best_rmse){
                            best_rmse = rmse;
                            best_index = j;
                        }
                    }
                }

                if(l.forced){
                    if(truth.w*truth.h < .1){
                        best_index = 1;
                    }else{
                        best_index = 0;
                    }
                }

                int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
                int tbox_index = truth_index + 1 + l.classes;

                box out = float_to_box(l.output + box_index);
                out.x /= l.side;
                out.y /= l.side;
                if (l.sqrt) {
                    out.w = out.w*out.w;
                    out.h = out.h*out.h;
                }
                float iou  = box_iou(out, truth);

                //printf("%d,", best_index);
                int p_index = index + locations*l.classes + i*l.n + best_index;
                *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
                *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);
                avg_obj += l.output[p_index];
                l.delta[p_index] = l.object_scale * (1.-l.output[p_index]);

                if(l.rescore){
                    l.delta[p_index] = l.object_scale * (iou - l.output[p_index]);
                }

                l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]);
                l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]);
                l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]);
                l.delta[box_index+3] = l.coord_scale*(state.truth[tbox_index + 3] - l.output[box_index + 3]);
                if(l.sqrt){
                    l.delta[box_index+2] = l.coord_scale*(sqrt(state.truth[tbox_index + 2]) - l.output[box_index + 2]);
                    l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]);
                }

                *(l.cost) += pow(1-iou, 2);
                avg_iou += iou;
                ++count;
            }
            if(l.softmax){
                gradient_array(l.output + index + locations*l.classes, locations*l.n*(1+l.coords), 
                        LOGISTIC, l.delta + index + locations*l.classes);
            }
        }
        if ( l.b_debug )
        {
            printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
        }
    }
}
void backward_activation_layer(layer l, network net) {
	gradient_array(l.output, l.outputs * l.batch, l.activation, l.delta);
	copy_cpu(l.outputs * l.batch, l.delta, 1, net.delta, 1);
}