Esempio n. 1
0
void BaseConvolutionLayer<Dtype>::weight_gpu_gemm(const Dtype* input,
    const Dtype* output, Dtype* weights) {
  const Dtype* col_buff = input;
  if (!is_1x1_) {
    conv_im2col_gpu(input, col_buffer_.mutable_gpu_data());
    col_buff = col_buffer_.gpu_data();
  }
  for (int g = 0; g < group_; ++g) {
    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, conv_out_channels_ / group_,
        kernel_dim_, conv_out_spatial_dim_,
        (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g,
        (Dtype)1., weights + weight_offset_ * g);
  }
}
Esempio n. 2
0
void BaseConvolutionLayer<Dtype>::forward_gpu_gemm(const Dtype* input,
                                                   const int_tp input_off,
                                                   const Dtype* weights,
                                                   Dtype* output,
                                                   const int_tp output_off,
                                                   bool skip_im2col) {
  const Dtype* col_buff = input;
  if (this->device_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    if (!is_1x1_) {
      if (!skip_im2col) {
        conv_im2col_gpu(input + input_off, col_buffer()->mutable_gpu_data());
      }
      col_buff = col_buffer()->gpu_data();
    }
    for (int_tp g = 0; g < group_; ++g) {
      caffe_gpu_gemm<Dtype>(
          CblasNoTrans, CblasNoTrans, conv_out_channels_ / group_,
          conv_out_spatial_dim_, kernel_dim_, (Dtype) 1.,
          weights + weight_offset_ * g,
          col_buff + (is_1x1_ ? input_off : 0) + col_offset_ * g, (Dtype) 0.,
          output + output_off + output_offset_ * g);
    }
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    if (!is_1x1_) {
      if (!skip_im2col) {
        greentea_conv_im2col_gpu(input, input_off,
                                 col_buffer()->mutable_gpu_data(), 0);
      }
      col_buff = col_buffer()->gpu_data();
    }
    for (int_tp g = 0; g < group_; ++g) {
      greentea_gpu_gemm<Dtype>(this->device_->id(), CblasNoTrans,
                               CblasNoTrans, conv_out_channels_ / group_,
                               conv_out_spatial_dim_, kernel_dim_,
                               (Dtype) 1., (cl_mem) weights, weight_offset_ * g,
                               (cl_mem) col_buff,
                               (is_1x1_ ? input_off : 0) + col_offset_ * g,
                               (Dtype) 0., (cl_mem) output,
                               output_off + output_offset_ * g);
    }
#endif  // USE_GREENTEA
  }
}
Esempio n. 3
0
void BaseConvolutionNDLayer<Dtype>::weight_gpu_gemm(const Dtype* input,
    const Dtype* output, Dtype* weights) {
  const Dtype* col_buff = input;
  if (this->device_context_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    if (!is_1x1_) {
      conv_im2col_gpu(input, col_buffer_.mutable_gpu_data());
      col_buff = col_buffer_.gpu_data();
    }
    for (int g = 0; g < group_; ++g) {
      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
          conv_out_channels_ / group_,
          kernel_dim_ / group_, conv_out_spatial_dim_,
          (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g,
          (Dtype)1., weights + weight_offset_ * g);
    }
#endif  // USE_CUDA
  }
}