Example #1
0
void BaseConvolutionLayer<Dtype>::backward_gpu_gemm(const Dtype* output,
    const Dtype* weights, Dtype* input) {
  Dtype* col_buff = col_buffer_.mutable_gpu_data();
  if (is_1x1_) {
    col_buff = input;
  }
  for (int g = 0; g < group_; ++g) {
    caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_,
        conv_out_spatial_dim_, conv_out_channels_ / group_,
        (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g,
        (Dtype)0., col_buff + col_offset_ * g);
  }
  if (!is_1x1_) {
    conv_col2im_gpu(col_buff, input);
  }
}
Example #2
0
void BaseConvolutionNDLayer<Dtype>::backward_gpu_gemm(const Dtype* output,
    const Dtype* weights, Dtype* input) {
  Dtype* col_buff = col_buffer_.mutable_gpu_data();
  if (is_1x1_) {
    col_buff = input;
  }
  if (this->device_context_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    for (int g = 0; g < group_; ++g) {
      caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_ / group_,
          conv_out_spatial_dim_, conv_out_channels_ / group_,
          (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g,
          (Dtype)0., col_buff + col_offset_ * g);
    }
    if (!is_1x1_) {
      conv_col2im_gpu(col_buff, input);
    }
#endif  // USE_CUDA
  }
}
Example #3
0
void BaseConvolutionLayer<Dtype>::backward_gpu_gemm(const Dtype* output,
                                                    const int output_off,
                                                    const Dtype* weights,
                                                    Dtype* input,
                                                    const int input_off) {
  Dtype* col_buff = col_buffer()->mutable_gpu_data();
  if (is_1x1_) {
    col_buff = input;
  }
  if (this->device_context_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    for (int g = 0; g < group_; ++g) {
      caffe_gpu_gemm<Dtype>(
          CblasTrans, CblasNoTrans, kernel_dim_ / group_, conv_out_spatial_dim_,
          conv_out_channels_ / group_, (Dtype) 1., weights + weight_offset_ * g,
          output + output_off + output_offset_ * g, (Dtype) 0.,
          col_buff + (is_1x1_ ? input_off : 0) + col_offset_ * g);
    }
    if (!is_1x1_) {
      conv_col2im_gpu(col_buff, input + input_off);
    }
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    for (int g = 0; g < group_; ++g) {
      greentea_gpu_gemm<Dtype>(this->device_context_->id(), CblasTrans,
                               CblasNoTrans, kernel_dim_ / group_,
                               conv_out_spatial_dim_,
                               conv_out_channels_ / group_, (Dtype) 1.,
                               (cl_mem) weights, weight_offset_ * g,
                               (cl_mem) output, output_off + output_offset_ * g,
                               (Dtype) 0., (cl_mem) col_buff,
                               (is_1x1_ ? input_off : 0) + col_offset_ * g);
    }
    if (!is_1x1_) {
      greentea_conv_col2im_gpu(col_buff, 0, input, input_off);
    }
#endif  // USE_GREENTEA
  }
}