void DeConvolutionLayer::col2im(Mat &dstMat)
    {
        if (is1x1()) return;

        if (dstMat.type() == CV_32F)
            col2im_cpu((float*)colMat.ptr(), inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float*)dstMat.ptr());
        if (dstMat.type() == CV_64F)
            col2im_cpu((double*)colMat.ptr(), inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (double*)dstMat.ptr());
    }
Esempio n. 2
0
	void conv_col2im_cpu(const Dtype* col_buff, Dtype* data){
		if (!force_nd_im2col&&num_spatial_axes == 2){
			col2im_cpu(col_buff, conv_in_channels, conv_input_shape.cpu_data()[1], conv_input_shape.cpu_data()[2],
				kernel_shape.cpu_data()[0], kernel_shape.cpu_data()[1], pad.cpu_data()[0], pad.cpu_data()[1],
				stride.cpu_data()[0], stride.cpu_data()[1], data);
		}
	}
void forward_deconvolutional_layer(const layer l, network net) {
	int i;

	int m = l.size * l.size * l.n;
	int n = l.h * l.w;
	int k = l.c;

	fill_cpu(l.outputs * l.batch, 0, l.output, 1);

	for (i = 0; i < l.batch; ++i) {
		real_t *a = l.weights;
		real_t *b = net.input + i * l.c * l.h * l.w;
		real_t *c = net.workspace;

		gemm_cpu(1, 0, m, n, k, 1, a, m, b, n, 0, c, n);

		col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride,
				l.pad, l.output + i * l.outputs);
	}
	if (l.batch_normalize) {
		forward_batchnorm_layer(l, net);
	} else {
		add_bias(l.output, l.biases, l.batch, l.n, l.out_w * l.out_h);
	}
	activate_array(l.output, l.batch * l.n * l.out_w * l.out_h, l.activation);
}
Esempio n. 4
0
void forward_deconvolutional_layer(const layer l, network_state state)
{
    int i;
    int out_h = l.out_h;
    int out_w = l.out_w;
    int size = out_h*out_w;

    int m = l.size*l.size*l.n;
    int n = l.h*l.w;
    int k = l.c;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    for(i = 0; i < l.batch; ++i){
        float *a = l.weights;
        float *b = state.input + i*l.c*l.h*l.w;
        float *c = state.workspace;

        gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);

        col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }
    activate_array(l.output, l.batch*l.n*size, l.activation);
}
void backward_convolutional_layer(convolutional_layer l, network_state state)
{
    int i;
    int m = l.n;
    int n = l.size*l.size*l.c;
    int k = convolutional_out_height(l)*
        convolutional_out_width(l);

    gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
    backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);

    for(i = 0; i < l.batch; ++i){
        float *a = l.delta + i*m*k;
        float *b = l.col_image;
        float *c = l.filter_updates;

        float *im = state.input+i*l.c*l.h*l.w;

        im2col_cpu(im, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

        if(state.delta){
            a = l.filters;
            b = l.delta + i*m*k;
            c = l.col_image;

            gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}
Esempio n. 6
0
void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
  const Dtype* weight = this->blobs_[0]->cpu_data();
  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
  caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
  Dtype* bias_diff = NULL;
  if (bias_term_) {
    bias_diff = this->blobs_[1]->mutable_cpu_diff();
    caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
  }
  const int weight_offset = M_ * K_;
  const int col_offset = K_ * N_;
  const int top_offset = M_ * N_;
  for (int i = 0; i < top.size(); ++i) {
    const Dtype* top_diff = top[i]->cpu_diff();
    const Dtype* bottom_data = (*bottom)[i]->cpu_data();
    Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
    Dtype* col_data = col_buffer_.mutable_cpu_data();
    Dtype* col_diff = col_buffer_.mutable_cpu_diff();

    // Bias gradient, if necessary.
    if (bias_term_) {
      for (int n = 0; n < num_; ++n) {
        caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
            1., top_diff + top[0]->offset(n),
            static_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
            bias_diff);
      }
    }
    for (int n = 0; n < num_; ++n) {
      // Since we saved memory in the forward pass by not storing all col data,
      // we will need to recompute them.
      im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
                 width_, depth_, kernel_h_,kernel_w_,kernel_d_, 
				 pad_h_, pad_w_, pad_d_,
				 stride_h_,stride_w_,stride_d_,
				 col_data);
      // gradient w.r.t. weight. Note that we will accumulate diffs.
      for (int g = 0; g < group_; ++g) {
        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
          col_data + col_offset * g, (Dtype)1.,
          weight_diff + weight_offset * g);
      }
      // gradient w.r.t. bottom data, if necessary
      if (propagate_down[i]) {
        for (int g = 0; g < group_; ++g) {
          caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
            (Dtype)1., weight + weight_offset * g,
            top_diff + top[i]->offset(n) + top_offset * g,
            (Dtype)0., col_diff + col_offset * g);
        }
        // col2im back to the data
        col2im_cpu(col_diff, channels_, height_, width_, depth_, kernel_h_,kernel_w_,kernel_d_, 
					pad_h_, pad_w_, pad_d_, stride_h_, stride_w_, stride_d_,
					bottom_diff + (*bottom)[i]->offset(n));
      }
    }
  }
}
Esempio n. 7
0
void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  col2im_cpu(top[0]->cpu_diff(),
	top[0]->num(), channels_, height_, width_,
        kernel_h_, kernel_w_, pad_h_, pad_w_,
	stride_h_, stride_w_, hole_h_, hole_w_,
	bottom[0]->mutable_cpu_diff());
}
Esempio n. 8
0
void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
  for (int n = 0; n < top[0]->num(); ++n) {
    col2im_cpu(top_diff + top[0]->offset(n), channels_, height_, width_,
        kernel_size_, pad_, stride_, bottom_diff + (*bottom)[0]->offset(n));
  }
}
Dtype Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
	const Dtype* top_diff = top[0]->cpu_diff();
	Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
	for (int n = 0; n < top[0]->num(); ++n) {
		col2im_cpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_, WIDTH_,
				KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
	}
	return Dtype(0.);
}
Esempio n. 10
0
void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  for (int n = 0; n < num_; ++n) {
    col2im_cpu(top_diff + n * top_dim_, num_spatial_axes_,
        bottom[0]->shape().data() + channel_axis_,
        top[0]->shape().data() + channel_axis_,
        kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
        bottom_diff + n * bottom_dim_);
  }
}
Esempio n. 11
0
 inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
   if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
     col2im_cpu(col_buff, conv_in_channels_,
         conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
         kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
         pad_.cpu_data()[0], pad_.cpu_data()[1],
         stride_.cpu_data()[0], stride_.cpu_data()[1], data);
   } else {
     col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
         col_buffer_shape_.data(), kernel_shape_.cpu_data(),
         pad_.cpu_data(), stride_.cpu_data(), data);
   }
 }
Esempio n. 12
0
void backward_local_layer(local_layer l, network_state state)
{
    int i, j;
    int locations = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
        im2col_cpu(input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, l.col_image);

        for(j = 0; j < locations; ++j){ 
            float *a = l.delta + i*l.outputs + j;
            float *b = l.col_image + j;
            float *c = l.filter_updates + j*l.size*l.size*l.c*l.n;
            int m = l.n;
            int n = l.size*l.size*l.c;
            int k = 1;

            gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
        }

        if(state.delta){
            for(j = 0; j < locations; ++j){ 
                float *a = l.filters + j*l.size*l.size*l.c*l.n;
                float *b = l.delta + i*l.outputs + j;
                float *c = l.col_image + j;

                int m = l.size*l.size*l.c;
                int n = 1;
                int k = l.n;

                gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
            }

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}
Esempio n. 13
0
Dtype DeConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      vector<Blob<Dtype>*>* top) {

  //const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  //const Dtype* bottom_data = (*bottom)[0]->cpu_data();
  Dtype* top_data = (*top)[0]->mutable_cpu_data();

  Dtype* col_data = col_buffer_.mutable_cpu_data();
  Dtype* col_diff = col_buffer_.mutable_cpu_diff();

  
  int weight_offset = M_ * K_;
  int col_offset = K_ * N_;
  int bottom_offset = M_ * N_;
  for (int n = 0; n < num_; ++n) {
      for (int g = 0; g < group_; ++g) {
        caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
          (Dtype)1., weight + weight_offset * g,
          bottom_data + bottom[0]->offset(n) + bottom_offset * g,
          (Dtype)0., col_data + col_offset * g);
      }
      // col2im forward to the top_data
      col2im_cpu(col_data, channels_, height_out_, width_out_, kernel_size_, pad_,
                 stride_, top_data + (*top)[0]->offset(n));
      // add bias
      if (bias_term_) {
          caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
                                N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
                                reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
                                (Dtype)1., top_data + (*top)[0]->offset(n));
      }
      // Debugging stuff
      /*
      for (int k = 0; k < col_buffer_.count(); ++k) {
          std::cout << col_buffer_.cpu_data()[k] <<  "  "; 
      }
      std::cout << std::endl;
      */
  }
  
  return Dtype(0.);
}
Esempio n. 14
0
void backward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;
    int m = l.n/l.groups;
    int n = l.size*l.size*l.c/l.groups;
    int k = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    if(l.batch_normalize){
        backward_batchnorm_layer(l, net);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
    }

    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.delta + (i*l.groups + j)*m*k;
            float *b = net.workspace;
            float *c = l.weight_updates + j*l.nweights/l.groups;

            float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w;

            im2col_cpu(im, l.c/l.groups, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

            if(net.delta){
                a = l.weights + j*l.nweights/l.groups;
                b = l.delta + (i*l.groups + j)*m*k;
                c = net.workspace;

                gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

                col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, 
                    l.pad, net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w);
            }
        }
    }
}
Esempio n. 15
0
void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  for (int n = 0; n < num_; ++n) {
    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
      col2im_cpu(top_diff + n * top_dim_, channels_,
          bottom[0]->shape(channel_axis_ + 1),
          bottom[0]->shape(channel_axis_ + 2),
          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
          pad_.cpu_data()[0], pad_.cpu_data()[1],
          stride_.cpu_data()[0], stride_.cpu_data()[1],
          dilation_.cpu_data()[0], dilation_.cpu_data()[1],
          bottom_diff + n * bottom_dim_);
    } else {
      col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_,
          bottom[0]->shape().data() + channel_axis_,
          top[0]->shape().data() + channel_axis_,
          kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
          dilation_.cpu_data(), bottom_diff + n * bottom_dim_);
    }
  }
}
Esempio n. 16
0
void
TiedConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype> *> &top,
                                          const vector<bool> &propagate_down,
                                          vector<Blob<Dtype> *> *bottom) {
  //-----Same concept as Backward_cpu of convolutionlayer-----
  // but multiple times for each bottom-top pair, and accumulating dW
  const Dtype *weight = NULL;
  Dtype *weight_diff = NULL;
  if (this->param_propagate_down_[0]) {
    weight = this->blobs_[0]->cpu_data();
    weight_diff = this->blobs_[0]->mutable_cpu_diff();
    // Init weight diff to all 0s.
    caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
  }
  // bias gradient if necessary
  Dtype *bias_diff = NULL;
  if (bias_term_ && this->param_propagate_down_[1]) {
    bias_diff = this->blobs_[1]->mutable_cpu_diff();
    caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
  }

  const int weight_offset = M_ * K_;
  for (int i = 0; i < num_in_; ++i) {
    const Dtype *top_diff = NULL;
    // Bias gradient if necessary.
    if (bias_term_ && this->param_propagate_down_[1]) {
      top_diff = top[i]->cpu_diff();
      for (int n = 0; n < num_; ++n) {
        caffe_cpu_gemv<Dtype>(
            CblasNoTrans, num_output_, N_[i], 1., top_diff + top[i]->offset(n),
            reinterpret_cast<const Dtype *>(bias_multipliers_[i]->cpu_data()),
            1., bias_diff);
      }
    }
    if (this->param_propagate_down_[0] || propagate_down[i]) {
      if (!top_diff) {
        top_diff = top[i]->cpu_diff();
      }
      Dtype* col_data = this->col_buffers_[i]->mutable_cpu_data();
      const Dtype* bottom_data = (*bottom)[i]->cpu_data();
      Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();

      const int col_offset = K_ * N_[i];
      const int top_offset = M_ * N_[i];
      for (int n = 0; n < num_; ++n) {
	// Since we saved memory in the forward pass by not storing all col
	// data, we will need to recompute them.
	im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_[i],
		   width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_,
		   stride_w_, col_data);
	// gradient w.r.t. weight. Note that we will accumulate diffs.
	// AJ: propagate error Delta W_ij = error from above * this_activation^T
        if (this->param_propagate_down_[0]) {
	  for (int g = 0; g < group_; ++g) {
	    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_[i],
				  (Dtype)1.,
				  top_diff + top[i]->offset(n) + top_offset * g,
				  col_data + col_offset * g, (Dtype)1.,
				  weight_diff + weight_offset * g);
	  }
	}
	// gradient w.r.t. bottom data, if necessary
	// AJ: error here = W*error from above
	if (propagate_down[i]) {
          if (weight == NULL) {
            weight = this->blobs_[0]->cpu_data();
          }
	  for (int g = 0; g < group_; ++g) {
	    caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_[i], M_,
				  (Dtype)1., weight + weight_offset * g,
				  top_diff + top[i]->offset(n) + top_offset * g,
				  (Dtype)0., col_data + col_offset * g);
	  }
	  // col2im back to the data
	  col2im_cpu(col_data, channels_, height_[i], width_[i], kernel_h_,
		     kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
		     bottom_diff + (*bottom)[i]->offset(n));
	}
      }
    }
    //---------------------------------------------------------
  }
}
Esempio n. 17
0
	void NonLocalLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
		const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
	{
		vector<bool> propagate_down_sub;
		propagate_down_sub.push_back(propagate_down[0]);
		propagate_down_sub.push_back(propagate_down[0]);
		if (propagate_down[0])
		{
			for (int i = 0; i < eltwise_bottom_vec.size(); i++)
				caffe_set(eltwise_bottom_vec[i]->count(), (Dtype)0, eltwise_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < smooth_bottom_vec.size(); i++)
				caffe_set(smooth_bottom_vec[i]->count(), (Dtype)0, smooth_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < euclidean_bottom_vec.size(); i++)
				caffe_set(euclidean_bottom_vec[i]->count(), (Dtype)0, euclidean_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < split_1_bottom_vec.size(); i++)
				caffe_set(split_1_bottom_vec[i]->count(), (Dtype)0, split_1_bottom_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < smooth_top_vec.size(); i++)
				caffe_set(smooth_top_vec[i]->count(), (Dtype)0, smooth_top_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < split_0_top_vec.size(); i++)
				caffe_set(split_0_top_vec[i]->count(), (Dtype)0, split_0_top_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < split_3_top_vec.size(); i++)
				caffe_set(split_3_top_vec[i]->count(), (Dtype)0, split_3_top_vec[i]->mutable_cpu_diff());
			for (int i = 0; i < normalize_top_vec.size(); i++)
				caffe_set(normalize_top_vec[i]->count(), (Dtype)0, normalize_top_vec[i]->mutable_cpu_diff());

			if (top.size() == 3)
				eltwise_layer->Backward(eltwise_top_vec, propagate_down_sub, eltwise_bottom_vec);

			split_layer_2->Backward(split_2_top_vec, propagate_down_sub, split_2_bottom_vec);
			//int tmp_offset = smooth_top_vec[0]->offset(1);
			const int tmp_offset = split_3_top_vec[0]->offset(1);
			//const Dtype* eltwise_bottom_1_diff = eltwise_bottom_vec[1]->cpu_diff();
			const Dtype* split_2_bottom_diff = split_2_bottom_vec[0]->cpu_diff();
			//Dtype* smooth_top_diff = smooth_top_vec[0]->mutable_cpu_diff();
			Dtype* split_3_top_diff = split_3_top_vec[0]->mutable_cpu_diff();
			for (int n = 0; n < split_2_bottom_vec[0]->num(); ++n)
			{
				for (int ch = 0; ch < channels_; ++ch)
				{
					//caffe_add(tmp_offset, smooth_top_diff, split_2_bottom_diff, smooth_top_diff);
					caffe_add(tmp_offset, split_3_top_diff, split_2_bottom_diff, split_3_top_diff);
					split_2_bottom_diff += tmp_offset;
				}
				//smooth_top_diff += tmp_offset;
				split_3_top_diff += tmp_offset;
			}

			const int norm_offset = normalize_top_vec[0]->offset(1);
			Dtype* normalize_diff = normalize_top_vec[0]->mutable_cpu_diff();
			const Dtype* top_1_diff = top[1]->cpu_diff();
			for (int n = 0; n < normalize_top_vec[0]->num(); ++n)
			{
				for (int ch = 0; ch < channels_; ++ch)
				{
					caffe_add(tmp_offset, normalize_diff, top_1_diff, normalize_diff);
					top_1_diff += norm_offset;
				}
				normalize_diff += norm_offset;
			}
			//nomralize_top_vec[0]->ShareDiff(*top[1]);
			normalize_layer->Backward(normalize_top_vec, propagate_down_sub, normalize_bottom_vec);
			split_3_top_vec[1]->ShareDiff(*normalize_bottom_vec[0]);
			split_layer_3->Backward(split_3_top_vec, propagate_down_sub, split_3_bottom_vec);
			smooth_threshold_layer->Backward(smooth_top_vec, propagate_down_sub, smooth_bottom_vec);

			caffe_scal(euclidean_top_vec[0]->count(),
				(Dtype)(1.0 / bottom[0]->channels()), euclidean_top_vec[0]->mutable_cpu_diff());

			euclidean_layer->Backward(euclidean_top_vec, propagate_down_sub, euclidean_bottom_vec);
			split_1_top_vec[1]->ShareDiff(*euclidean_bottom_vec[0]);
			split_layer_1->Backward(split_1_top_vec, propagate_down_sub, split_1_bottom_vec);

			for (int n = 0; n < num_; ++n)
			{
				col2im_center_cpu(img2col_1_top.cpu_diff() + img2col_1_top.offset(n), channels_, height_, width_,
					kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
					split_0_top_vec[1]->mutable_cpu_diff() + split_0_top_vec[1]->offset(n));

				col2im_cpu(img2col_0_top.cpu_diff() + img2col_0_top.offset(n), channels_, height_, width_,
					kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
					1,1,
					split_0_top_vec[0]->mutable_cpu_diff() + split_0_top_vec[0]->offset(n));
			}
			split_layer_0->Backward(split_0_top_vec, propagate_down_sub,bottom);
		}
	}
Esempio n. 18
0
 inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
   col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_,
       kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data);
 }
Esempio n. 19
0
void LocalLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

  const Dtype* top_diff = top[0]->cpu_diff();
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
  Dtype* x_data = col_buffer_.mutable_cpu_data();
  Dtype* x_diff = col_buffer_.mutable_cpu_diff();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
  Dtype* bias_diff = NULL;

  Blob<Dtype> intermediate;
  intermediate.Reshape(1, 1, 1, N_);

  Blob<Dtype> xt;
  xt.Reshape(1, 1, K_, N_);
  Dtype* xt_data = xt.mutable_cpu_data();

  if (bias_term_) {
    bias_diff = this->blobs_[1]->mutable_cpu_diff();
    memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
    for (int n = 0; n < num_; ++n) {
      caffe_add(M_ * N_, bias_diff,
                top_diff + top[0]->offset(n),
                bias_diff);
    }
  }

  memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
  for (int n=0; n<num_; n++) {
    im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
               width_, kernel_size_, kernel_size_, pad_, pad_, stride_, stride_, x_data);

    // gradient wrt weight
    for (int m=0; m<num_output_; m++) {
      Dtype* filter_weight_diff = weight_diff+this->blobs_[0]->offset(m);
      for (int k=0; k<K_; k++) {
        caffe_mul(N_, top_diff+top[0]->offset(n, m),  
                  x_data+col_buffer_.offset(0,k), xt_data+xt.offset(0,0,k));
      }
      caffe_cpu_axpby(K_*N_, Dtype(1.0), xt_data, Dtype(1.0), filter_weight_diff);
    }
      
    // gradient wrt bottom data
    if (propagate_down[0]) {
      memset(x_diff, 0, col_buffer_.count() * sizeof(Dtype));
      for (int m=0; m<num_output_; m++) {
        for (int k=0; k<K_; k++) {
          caffe_mul(N_, top_diff+top[0]->offset(n, m),
                    weight+this->blobs_[0]->offset(m,0,k),
                    intermediate.mutable_cpu_data());

          caffe_cpu_axpby(N_, Dtype(1.0),
                          intermediate.cpu_data(), Dtype(1.0),
                          x_diff+col_buffer_.offset(0,k));
        }
      }

      // col2im back to the data
      col2im_cpu(x_diff, channels_, height_, width_, kernel_size_, kernel_size_,
                 pad_, pad_, stride_, stride_, bottom_diff + bottom[0]->offset(n));

    }
  }

}