예제 #1
0
	void conv_im2col_gpu(const Dtype* data, Dtype* col_buff){
		if (!force_nd_im2col&&num_spatial_axes == 2){
			im2col_gpu(data, conv_in_channels, conv_input_shape.cpu_data()[1], conv_input_shape.cpu_data()[2],
				kernel_shape.cpu_data()[0], kernel_shape.cpu_data()[1], pad.cpu_data()[0], pad.cpu_data()[1],
				stride.cpu_data()[0], stride.cpu_data()[1], col_buff);
		}
	}
void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
		vector<Blob<Dtype>*>* top) {
	const Dtype* bottom_data = bottom[0]->gpu_data();
	Dtype* top_data = (*top)[0]->mutable_gpu_data();
	for (int n = 0; n < bottom[0]->num(); ++n) {
		im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
				WIDTH_, KSIZE_, STRIDE_, top_data + (*top)[0]->offset(n));
	}
}
예제 #3
0
 inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
   if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
     im2col_gpu(data, conv_in_channels_,
         conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
         kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
         pad_.cpu_data()[0], pad_.cpu_data()[1],
         stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
   } else {
     im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,
         conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
         kernel_shape_.gpu_data(), pad_.gpu_data(),
         stride_.gpu_data(), col_buff);
   }
 }
예제 #4
0
void
TiedConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype> *> &bottom,
                                         vector<Blob<Dtype> *> *top) {

  const Dtype *weight = this->blobs_[0]->gpu_data();
  const int weight_offset = M_ * K_;
  for (int i = 0; i < num_in_; ++i) {
    //-----Same concept as Forward_gpu of convolutionlayer-----
    const Dtype *bottom_data = bottom[i]->gpu_data();
    const int col_offset = K_ * N_[i];
    const int top_offset = M_ * N_[i];
    Dtype *top_data = (*top)[i]->mutable_gpu_data();
    Dtype *col_data = this->col_buffers_[i]->mutable_gpu_data();
    for (int n = 0; n < num_; ++n) {
      // First, im2col
      im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_[i],
                 width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_,
                 stride_w_, col_data);
      // Second, innerproduct with groups.
      for (int g = 0; g < group_; ++g) {
        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_[i], K_,
                              (Dtype)1., weight + weight_offset * g,
                              col_data + col_offset * g, (Dtype)0.,
                              top_data + (*top)[i]->offset(n) + top_offset * g);
      }
      // third, add bias
      if (bias_term_) {
        caffe_gpu_gemm<Dtype>(
            CblasNoTrans, CblasNoTrans, num_output_, N_[i], 1, (Dtype)1.,
            this->blobs_[1]->gpu_data(),
            reinterpret_cast<const Dtype *>(bias_multipliers_[i]->gpu_data()),
            (Dtype)1., top_data + (*top)[i]->offset(n));
      }
    }
    //---------------------------------------------------------
  }
  // montage(this->blobs_[0].get(), "tconv" +
  // boost::lexical_cast<std::string>(M_));
}
예제 #5
0
 inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
   im2col_gpu(data, bottom_offset_, conv_in_channels_, conv_in_height_,
       conv_in_width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_,
       stride_w_, col_buff, 0);
 }
예제 #6
0
 inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
   im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_,
       kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
       filter_stride_h_, filter_stride_w_, col_buff);
 }
예제 #7
0
void
TiedConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype> *> &top,
                                          const vector<bool> &propagate_down,
                                          vector<Blob<Dtype> *> *bottom) {
  const Dtype *weight = NULL;
  Dtype *weight_diff = NULL;
  if (this->param_propagate_down_[0]) {
    weight = this->blobs_[0]->gpu_data();
    weight_diff = this->blobs_[0]->mutable_gpu_diff();
    // Init weight diffs to all 0s.
    caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
  }
  Dtype *bias_diff = NULL;
  if (bias_term_ && this->param_propagate_down_[1]) {
    bias_diff = this->blobs_[1]->mutable_gpu_diff();
    caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
  }

  const int weight_offset = M_ * K_;
  for (int i = 0; i < num_in_; ++i) {
    //-----Same concept as Backward_cpu of convolutionlayer-----
    const Dtype* top_diff = NULL;
    // Bias gradient if necessary
    if (bias_term_ && this->param_propagate_down_[1]) {
      top_diff = top[i]->gpu_diff();
      for (int n = 0; n < num_; ++n) {
        caffe_gpu_gemv<Dtype>(
            CblasNoTrans, num_output_, N_[i], 1., top_diff + top[i]->offset(n),
            reinterpret_cast<const Dtype *>(bias_multipliers_[i]->gpu_data()),
            1., bias_diff);
      }
    }
    if (this->param_propagate_down_[0] || propagate_down[i]) {
      if (!top_diff) {
        top_diff = top[i]->gpu_diff();
      }
      Dtype* col_data = this->col_buffers_[i]->mutable_gpu_data();
      const Dtype* bottom_data = (*bottom)[i]->gpu_data();
      Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();

      const int col_offset = K_ * N_[i];
      const int top_offset = M_ * N_[i];
      for (int n = 0; n < num_; ++n) {
	// Since we saved memory in the forward pass by not storing all col data,
	// we will need to recompute them.
	im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_[i],
                   width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_,
                   stride_h_, stride_w_, col_data);
	// gradient w.r.t. weight. Note that we will accumulate diffs.
        if (this->param_propagate_down_[0]) {
	  for (int g = 0; g < group_; ++g) {
	    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_[i],
				  (Dtype)1.,
				  top_diff + top[i]->offset(n) + top_offset * g,
				  col_data + col_offset * g, (Dtype)1.,
				  weight_diff + weight_offset * g);
	  }
	}
	// gradient w.r.t. bottom data, if necessary
	if (propagate_down[i]) {
          if (weight == NULL) {
            weight = this->blobs_[0]->gpu_data();
          }
	  for (int g = 0; g < group_; ++g) {
	    caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_[i], M_,
				  (Dtype)1., weight + weight_offset * g,
				  top_diff + top[i]->offset(n) + top_offset * g,
				  (Dtype)0., col_data + col_offset * g);
	  }
	  // col2im back to the data
	  col2im_gpu(col_data, channels_, height_[i], width_[i], kernel_h_,
		     kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
		     bottom_diff + (*bottom)[i]->offset(n));
	}
      }
    }
    // montage_channels(this->blobs_[0].get(),
    // boost::lexical_cast<std::string>(M_) + " tconv bprop " +
    // boost::lexical_cast<std::string>(i) , true);
    //// make sure to give back the pointer to gpu after visualization
    // weight_diff = this->blobs_[0]->mutable_gpu_diff();
  } // end for each input
    // montage_channels(this->blobs_[0].get(), "final tconv bprop " +
    // boost::lexical_cast<std::string>(M_), true);
    // cv::waitKey(0);
}