void conv_im2col_gpu(const Dtype* data, Dtype* col_buff){ if (!force_nd_im2col&&num_spatial_axes == 2){ im2col_gpu(data, conv_in_channels, conv_input_shape.cpu_data()[1], conv_input_shape.cpu_data()[2], kernel_shape.cpu_data()[0], kernel_shape.cpu_data()[1], pad.cpu_data()[0], pad.cpu_data()[1], stride.cpu_data()[0], stride.cpu_data()[1], col_buff); } }
void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); for (int n = 0; n < bottom[0]->num(); ++n) { im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, STRIDE_, top_data + (*top)[0]->offset(n)); } }
inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { if (!force_nd_im2col_ && num_spatial_axes_ == 2) { im2col_gpu(data, conv_in_channels_, conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2], kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1], pad_.cpu_data()[0], pad_.cpu_data()[1], stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff); } else { im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_, conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(), kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(), col_buff); } }
void TiedConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype> *> &bottom, vector<Blob<Dtype> *> *top) { const Dtype *weight = this->blobs_[0]->gpu_data(); const int weight_offset = M_ * K_; for (int i = 0; i < num_in_; ++i) { //-----Same concept as Forward_gpu of convolutionlayer----- const Dtype *bottom_data = bottom[i]->gpu_data(); const int col_offset = K_ * N_[i]; const int top_offset = M_ * N_[i]; Dtype *top_data = (*top)[i]->mutable_gpu_data(); Dtype *col_data = this->col_buffers_[i]->mutable_gpu_data(); for (int n = 0; n < num_; ++n) { // First, im2col im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_[i], width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_data); // Second, innerproduct with groups. for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_[i], K_, (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, (Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g); } // third, add bias if (bias_term_) { caffe_gpu_gemm<Dtype>( CblasNoTrans, CblasNoTrans, num_output_, N_[i], 1, (Dtype)1., this->blobs_[1]->gpu_data(), reinterpret_cast<const Dtype *>(bias_multipliers_[i]->gpu_data()), (Dtype)1., top_data + (*top)[i]->offset(n)); } } //--------------------------------------------------------- } // montage(this->blobs_[0].get(), "tconv" + // boost::lexical_cast<std::string>(M_)); }
inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { im2col_gpu(data, bottom_offset_, conv_in_channels_, conv_in_height_, conv_in_width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff, 0); }
inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, filter_stride_h_, filter_stride_w_, col_buff); }
void TiedConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype> *> &top, const vector<bool> &propagate_down, vector<Blob<Dtype> *> *bottom) { const Dtype *weight = NULL; Dtype *weight_diff = NULL; if (this->param_propagate_down_[0]) { weight = this->blobs_[0]->gpu_data(); weight_diff = this->blobs_[0]->mutable_gpu_diff(); // Init weight diffs to all 0s. caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } Dtype *bias_diff = NULL; if (bias_term_ && this->param_propagate_down_[1]) { bias_diff = this->blobs_[1]->mutable_gpu_diff(); caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); } const int weight_offset = M_ * K_; for (int i = 0; i < num_in_; ++i) { //-----Same concept as Backward_cpu of convolutionlayer----- const Dtype* top_diff = NULL; // Bias gradient if necessary if (bias_term_ && this->param_propagate_down_[1]) { top_diff = top[i]->gpu_diff(); for (int n = 0; n < num_; ++n) { caffe_gpu_gemv<Dtype>( CblasNoTrans, num_output_, N_[i], 1., top_diff + top[i]->offset(n), reinterpret_cast<const Dtype *>(bias_multipliers_[i]->gpu_data()), 1., bias_diff); } } if (this->param_propagate_down_[0] || propagate_down[i]) { if (!top_diff) { top_diff = top[i]->gpu_diff(); } Dtype* col_data = this->col_buffers_[i]->mutable_gpu_data(); const Dtype* bottom_data = (*bottom)[i]->gpu_data(); Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff(); const int col_offset = K_ * N_[i]; const int top_offset = M_ * N_[i]; for (int n = 0; n < num_; ++n) { // Since we saved memory in the forward pass by not storing all col data, // we will need to recompute them. im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_[i], width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_data); // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_[i], (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, col_data + col_offset * g, (Dtype)1., weight_diff + weight_offset * g); } } // gradient w.r.t. bottom data, if necessary if (propagate_down[i]) { if (weight == NULL) { weight = this->blobs_[0]->gpu_data(); } for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_[i], M_, (Dtype)1., weight + weight_offset * g, top_diff + top[i]->offset(n) + top_offset * g, (Dtype)0., col_data + col_offset * g); } // col2im back to the data col2im_gpu(col_data, channels_, height_[i], width_[i], kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, bottom_diff + (*bottom)[i]->offset(n)); } } } // montage_channels(this->blobs_[0].get(), // boost::lexical_cast<std::string>(M_) + " tconv bprop " + // boost::lexical_cast<std::string>(i) , true); //// make sure to give back the pointer to gpu after visualization // weight_diff = this->blobs_[0]->mutable_gpu_diff(); } // end for each input // montage_channels(this->blobs_[0].get(), "final tconv bprop " + // boost::lexical_cast<std::string>(M_), true); // cv::waitKey(0); }