void BaseConvolutionLayer<Dtype>::backward_gpu_gemm(const Dtype* output, const Dtype* weights, Dtype* input) { Dtype* col_buff = col_buffer_.mutable_gpu_data(); if (is_1x1_) { col_buff = input; } for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, (Dtype)0., col_buff + col_offset_ * g); } if (!is_1x1_) { conv_col2im_gpu(col_buff, input); } }
void BaseConvolutionNDLayer<Dtype>::backward_gpu_gemm(const Dtype* output, const Dtype* weights, Dtype* input) { Dtype* col_buff = col_buffer_.mutable_gpu_data(); if (is_1x1_) { col_buff = input; } if (this->device_context_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_ / group_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, (Dtype)0., col_buff + col_offset_ * g); } if (!is_1x1_) { conv_col2im_gpu(col_buff, input); } #endif // USE_CUDA } }
void BaseConvolutionLayer<Dtype>::backward_gpu_gemm(const Dtype* output, const int output_off, const Dtype* weights, Dtype* input, const int input_off) { Dtype* col_buff = col_buffer()->mutable_gpu_data(); if (is_1x1_) { col_buff = input; } if (this->device_context_->backend() == BACKEND_CUDA) { #ifdef USE_CUDA for (int g = 0; g < group_; ++g) { caffe_gpu_gemm<Dtype>( CblasTrans, CblasNoTrans, kernel_dim_ / group_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype) 1., weights + weight_offset_ * g, output + output_off + output_offset_ * g, (Dtype) 0., col_buff + (is_1x1_ ? input_off : 0) + col_offset_ * g); } if (!is_1x1_) { conv_col2im_gpu(col_buff, input + input_off); } #endif // USE_CUDA } else { #ifdef USE_GREENTEA for (int g = 0; g < group_; ++g) { greentea_gpu_gemm<Dtype>(this->device_context_->id(), CblasTrans, CblasNoTrans, kernel_dim_ / group_, conv_out_spatial_dim_, conv_out_channels_ / group_, (Dtype) 1., (cl_mem) weights, weight_offset_ * g, (cl_mem) output, output_off + output_offset_ * g, (Dtype) 0., (cl_mem) col_buff, (is_1x1_ ? input_off : 0) + col_offset_ * g); } if (!is_1x1_) { greentea_conv_col2im_gpu(col_buff, 0, input, input_off); } #endif // USE_GREENTEA } }