void conv_col2im_gpu(const Dtype* col_buff, Dtype* data){ if (!force_nd_im2col&&num_spatial_axes == 2){ col2im_gpu(col_buff, conv_in_channels, conv_input_shape.cpu_data()[1], conv_input_shape.cpu_data()[2], kernel_shape.cpu_data()[0], kernel_shape.cpu_data()[1], pad.cpu_data()[0], pad.cpu_data()[1], stride.cpu_data()[0], stride.cpu_data()[1], data); } }
Dtype Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, const bool propagate_down, vector<Blob<Dtype>*>* bottom) { const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); for (int n = 0; n < top[0]->num(); ++n) { col2im_gpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n)); } return Dtype(0.); }
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { if (!force_nd_im2col_ && num_spatial_axes_ == 2) { col2im_gpu(col_buff, conv_in_channels_, conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], data); } else { col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_, conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), data); } }
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { col2im_gpu(col_buff, 0, conv_in_channels_, conv_in_height_, conv_in_width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data, bottom_offset_); }
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, filter_stride_h_, filter_stride_w_, data); }
void TiedConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype> *> &top, const vector<bool> &propagate_down, vector<Blob<Dtype> *> *bottom) { const Dtype *weight = NULL; Dtype *weight_diff = NULL; if (this->param_propagate_down_[0]) { weight = this->blobs_[0]->gpu_data(); weight_diff = this->blobs_[0]->mutable_gpu_diff(); // Init weight diffs to all 0s. caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } Dtype *bias_diff = NULL; if (bias_term_ && this->param_propagate_down_[1]) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); } const int weight_offset = M_ * K_; for (int i = 0; i < num_in_; ++i) { //-----Same concept as Backward_cpu of convolutionlayer----- const Dtype* top_diff = NULL; // Bias gradient if necessary if (bias_term_ && this->param_propagate_down_[1]) { top_diff = top[i]->gpu_diff(); for (int n = 0; n < num_; ++n) { caffe_gpu_gemv<Dtype>( CblasNoTrans, num_output_, N_[i], 1., top_diff + top[i]->offset(n), reinterpret_cast<const Dtype *>(bias_multipliers_[i]->gpu_data()), 1., bias_diff); } } if (this->param_propagate_down_[0] || propagate_down[i]) { if (!top_diff) { top_diff = top[i]->gpu_diff(); } Dtype* col_data = this->col_buffers_[i]->mutable_gpu_data(); const Dtype* bottom_data = (*bottom)[i]->gpu_data(); Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff(); const int col_offset = K_ * N_[i]; const int top_offset = M_ * N_[i]; for (int n = 0; n < num_; ++n) { // Since we saved memory in the forward pass by not storing all col data, // we will need to recompute them. im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_[i], width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_data); // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_[i], (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, col_data + col_offset * g, (Dtype)1., weight_diff + weight_offset * g); } } // gradient w.r.t. bottom data, if necessary if (propagate_down[i]) { if (weight == NULL) { weight = this->blobs_[0]->gpu_data(); } for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_[i], M_, (Dtype)1., weight + weight_offset * g, top_diff + top[i]->offset(n) + top_offset * g, (Dtype)0., col_data + col_offset * g); } // col2im back to the data col2im_gpu(col_data, channels_, height_[i], width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, bottom_diff + (*bottom)[i]->offset(n)); } } } // montage_channels(this->blobs_[0].get(), // boost::lexical_cast<std::string>(M_) + " tconv bprop " + // boost::lexical_cast<std::string>(i) , true); //// make sure to give back the pointer to gpu after visualization // weight_diff = this->blobs_[0]->mutable_gpu_diff(); } // end for each input // montage_channels(this->blobs_[0].get(), "final tconv bprop " + // boost::lexical_cast<std::string>(M_), true); // cv::waitKey(0); }