Exemple #1
0
inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool,
    cudnnPoolingMode_t mode, int h, int w, int pad_h, int pad_w,
    int stride_h, int stride_w) {
  CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool));
  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool, mode, h, w, pad_h, pad_w,
          stride_h, stride_w));
}
Exemple #2
0
inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc, Size size,
    Stride stride) {
  CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
  CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
          size.num(), size.channels(), size.height(), size.width(),
          stride.nstride(), stride.cstride(), stride.hstride(),
          stride.wstride()));
}
Exemple #3
0
ConvBC01CuDNN<T>::ConvBC01CuDNN(int pad_y, int pad_x, int stride_y,
    int stride_x) : pad_y(pad_y), pad_x(pad_x), stride_y(stride_y),
    stride_x(stride_x), n_imgs(0), n_channels(0), n_filters(0), img_h(0),
    img_w(0), filter_h(0), filter_w(0), workspace_size(0) {
  CUDNN_CHECK(cudnnCreateTensorDescriptor(&imgs_desc));
  CUDNN_CHECK(cudnnCreateTensorDescriptor(&convout_desc));
  CUDNN_CHECK(cudnnCreateFilterDescriptor(&filters_desc));
  CUDNN_CHECK(cudnnCreateConvolutionDescriptor(&conv_desc));
}
Exemple #4
0
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
    int n, int c, int h, int w) {
  CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
#if CUDNN_VERSION_MIN(5, 0, 0)
  CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
      CUDNN_TENSOR_NCHW, n, c, h, w));
#else
  CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
      CUDNN_TENSOR_NCHW, n, c, h, w));
#endif
}
Exemple #5
0
inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
    cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
    int pad_h, int pad_w, int stride_h, int stride_w) {
#if CUDNN_VERSION_MIN(6, 0, 0)
  CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
      pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION,
      dataType<Dtype>::type));
#else
    CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
      pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
#endif
}
Exemple #6
0
void ConvBC01CuDNN<T>::fprop(const T *imgs, const T *filters, int n_imgs,
    int n_channels, int n_filters, int img_h, int img_w, int filter_h,
    int filter_w, T *convout) {
  bool set_conv_desc = false;
  if (n_imgs != this->n_imgs || n_channels != this->n_channels ||
      img_h != this->img_h || img_w != this->img_w) {
    CUDNN_CHECK(cudnnSetTensor4dDescriptor(
        imgs_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, n_imgs, n_channels,
        img_h, img_w
    ));
    this->n_imgs = n_imgs;
    this->n_channels = n_channels;
    this->img_h = img_h;
    this->img_w = img_w;
    set_conv_desc = true;
  }
  if (n_filters != this->n_filters || n_channels != this->n_channels ||
      filter_h != this->filter_h || filter_w != this->filter_w) {
    CUDNN_CHECK(cudnnSetFilter4dDescriptor(
        filters_desc, CUDNN_DATA_FLOAT, n_filters, n_channels, filter_h,
        filter_w
    ));
    this->n_filters = n_filters;
    this->n_channels = n_channels;
    this->filter_h = filter_h;
    this->filter_w = filter_w;
    set_conv_desc = true;
  }
  if (set_conv_desc) {
    CUDNN_CHECK(cudnnSetConvolution2dDescriptor(
        conv_desc, pad_y, pad_x, stride_y, stride_x, 1, 1, CUDNN_CONVOLUTION
    ));
    int n, c, h, w;
    CUDNN_CHECK(cudnnGetConvolution2dForwardOutputDim(
      conv_desc, imgs_desc, filters_desc, &n, &c, &h, &w
    ));
    CUDNN_CHECK(cudnnSetTensor4dDescriptor(
        convout_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, n, c, h, w
    ));
    CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(
        CUDNN::handle(), imgs_desc, filters_desc, conv_desc, convout_desc,
        CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, WORKSPACE_LIMIT,
        &fwd_algo
    ));
    CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(
        CUDNN::handle(), imgs_desc, filters_desc, conv_desc, convout_desc,
        fwd_algo, &workspace_size
    ));

  }
  void *workspace = NULL;
  if (workspace_size > 0) {
    workspace = CUDA::buffer(workspace_size);
  }
  CUDNN_CHECK(cudnnConvolutionForward(
      CUDNN::handle(), &CUDNN::one, imgs_desc, imgs, filters_desc, filters,
      conv_desc, fwd_algo, workspace, workspace_size, &CUDNN::zero,
      convout_desc, convout
  ));
}
Exemple #7
0
void ConvBC01CuDNN<T>::bprop(const T* imgs, const T* filters,
                             const T *convout_d, T *imgs_d, T *filters_d) {
  if (filters_d) {
    CUDNN_CHECK(cudnnConvolutionBackwardFilter(
      CUDNN::handle(), &CUDNN::one, imgs_desc, imgs, convout_desc, convout_d,
      conv_desc, &CUDNN::zero, filters_desc, filters_d
    ));
  }
  if (imgs_d) {
    CUDNN_CHECK(cudnnConvolutionBackwardData(
      CUDNN::handle(), &CUDNN::one, filters_desc, filters, convout_desc,
      convout_d, conv_desc, &CUDNN::zero, imgs_desc, imgs_d
    ));
  }
}
void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  ConvolutionLayer<Dtype>::LayerSetUp(bottom, top);
  // Initialize CUDA streams and cuDNN.
  stream_         = new cudaStream_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
  handle_         = new cudnnHandle_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
  workspaceSizeInBytes = 0;
  workspace = NULL;

  workspace = NULL;
  workspaceSizeInBytes = (size_t)0;

  for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
    CUDA_CHECK(cudaStreamCreate(&stream_[g]));
    CUDNN_CHECK(cudnnCreate(&handle_[g]));
    CUDNN_CHECK(cudnnSetStream(handle_[g], stream_[g]));
  }

  // Set the indexing parameters.
  weight_offset_ = (this->num_output_ / this->group_)
      * (this->channels_ / this->group_) * this->kernel_h_ * this->kernel_w_;
  bias_offset_ = (this->num_output_ / this->group_);

  // Create filter descriptor.
  cudnn::createFilterDesc<Dtype>(&filter_desc_,
      this->num_output_ / this->group_, this->channels_ / this->group_,
      this->kernel_h_, this->kernel_w_);

  // Create tensor descriptor(s) for data and corresponding convolution(s).
  for (int i = 0; i < bottom.size(); i++) {
    cudnnTensorDescriptor_t bottom_desc;
    cudnn::createTensor4dDesc<Dtype>(&bottom_desc);
    bottom_descs_.push_back(bottom_desc);
    cudnnTensorDescriptor_t top_desc;
    cudnn::createTensor4dDesc<Dtype>(&top_desc);
    top_descs_.push_back(top_desc);
    cudnnConvolutionDescriptor_t conv_desc;
    cudnn::createConvolutionDesc<Dtype>(&conv_desc);
    conv_descs_.push_back(conv_desc);
  }

  // Tensor descriptor for bias.
  if (this->bias_term_) {
    cudnn::createTensor4dDesc<Dtype>(&bias_desc_);
  }

  handles_setup_ = true;
}
Exemple #9
0
void PoolBC01CuDNN<T>::bprop(const T *imgs, const T* poolout,
                             const T *poolout_d, T *imgs_d) {
  CUDNN_CHECK(cudnnPoolingBackward(
    CUDNN::handle(), pool_desc, &CUDNN::one, poolout_desc, poolout,
    poolout_desc, poolout_d, imgs_desc, imgs, &CUDNN::zero, imgs_desc, imgs_d
  ));
}
Exemple #10
0
inline void setTensorNdDesc(cudnnTensorDescriptor_t* desc,
    const int_tp total_dims,
    const int_tp* shape, const int_tp* stride) {

  // Pad to at least 4 dimensions
  int_tp cudnn_dims = std::max(total_dims, (int_tp)4);
  int_tp padding = std::max((int_tp)0, cudnn_dims - total_dims);

  std::vector<int> shape_int(cudnn_dims);
  std::vector<int> stride_int(cudnn_dims);

  for (int_tp i = cudnn_dims - 1; i >= 0; --i) {
    if (i < padding) {
      shape_int[i] = 1;
      stride_int[i] = shape_int[i + 1] * stride_int[i + 1];
    } else {
      shape_int[i] = shape[i - padding];
      stride_int[i] = stride[i - padding];
    }
  }

  const int* shape_ptr = &shape_int[0];
  const int* stride_ptr = &stride_int[0];

  CUDNN_CHECK(
      cudnnSetTensorNdDescriptor(*desc, dataType<Dtype>::type, cudnn_dims,
                                 shape_ptr, stride_ptr));
}
Exemple #11
0
void Activation::compute_gpu(const vector<bool>& add) {
  DTYPE alpha = 1.;
  DTYPE beta = add[0] ? 1. : 0.;
  CUDNN_CHECK(cudnnActivationForward(cudnn_handle(), activation_mode_,
          &alpha, bottom_desc_, inputs_[0]->gpu_data(), &beta, top_desc_,
          outputs_[0]->mutable_gpu_data()));
}
void CuDNNPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  PoolingLayer<Dtype>::LayerSetUp(bottom, top);

  // stride
	const int* kernel_shape_data = this->kernel_shape_.cpu_data();
	// stride
	const int* stride_data = this->stride_.cpu_data();
	// padding
	const int* pad_data = this->pad_.cpu_data();

	int kernel_shape[this->num_spatial_axes_];
	int stride[this->num_spatial_axes_];
	int pad[this->num_spatial_axes_];
	for (int i = 0; i < this->num_spatial_axes_; i++){
		kernel_shape[i] = kernel_shape_data[i];
		stride[i] = stride_data[i];
		pad[i] = pad_data[i];
	}

	CUDNN_CHECK(cudnnCreate(&handle_));

	cudnn::createTensorDesc<Dtype>(&bottom_desc_);
	cudnn::createTensorDesc<Dtype>(&top_desc_);
	cudnn::createPoolingNdDesc<Dtype>(&pooling_desc_,
			this->layer_param_.pooling_param().pool(), &mode_,
			this->num_spatial_axes_, kernel_shape,
			pad, stride);
	handles_setup_ = true;
}
Exemple #13
0
void Softmax::compute_gpu(const vector<bool>& add) {
  DTYPE alpha = 1.;
  DTYPE beta = add[0] ? 1. : 0.;
  CUDNN_CHECK(cudnnSoftmaxForward(cudnn_handle(), CUDNN_SOFTMAX_ACCURATE,
          softmax_mode_, &alpha, bottom_desc_, inputs_[0]->gpu_data(), &beta,
          top_desc_, outputs_[0]->mutable_gpu_data()));
}
Exemple #14
0
void Context::Init(int device_id) {
  device_id_ = device_id;

  SwitchDevice();

#if defined(USE_CUDA)
  if (blas_handle_ == nullptr) {
    CUBLAS_CHECK(cublasCreate((cublasHandle_t*)&blas_handle_));
    CHECK_NOTNULL(blas_handle_);
  }
#endif

#if defined(USE_CUDNN)
  if (cudnn_handle_ == nullptr) {
    CUDNN_CHECK(cudnnCreate((cudnnHandle_t*)&cudnn_handle_));
    CHECK_NOTNULL(cudnn_handle_);
  }
#endif

#if defined(USE_NNPACK)
  if (nnpack_handle_ == nullptr) {
    CHECK_EQ(nnp_initialize(), nnp_status_success);
    nnpack_handle_ = pthreadpool_create(0);
    CHECK_NOTNULL(nnpack_handle_);
  }
#endif
}
Exemple #15
0
void CuDNNLRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                                      const vector<Blob<Dtype>*>& top) {
    LRNLayer<Dtype>::LayerSetUp(bottom, top);

    CUDNN_CHECK(cudnnCreate(&handle_));
    CUDNN_CHECK(cudnnCreateLRNDescriptor(&norm_desc_));
    cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
    cudnn::createTensor4dDesc<Dtype>(&top_desc_);

    // create a LRN handle
    handles_setup_ = true;

    size_ = this->layer_param().lrn_param().local_size();
    alpha_ = this->layer_param().lrn_param().alpha();
    beta_ = this->layer_param().lrn_param().beta();
    k_ = this->layer_param().lrn_param().k();
}
inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
    PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
    int h, int w, int stride_h, int stride_w) {
  switch (poolmethod) {
  case PoolingParameter_PoolMethod_MAX:
    *mode = CUDNN_POOLING_MAX;
    break;
  case PoolingParameter_PoolMethod_AVE:
    *mode = CUDNN_POOLING_AVERAGE;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
  CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
  CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w,
        stride_h, stride_w));
}
Exemple #17
0
inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
    PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
    int h, int w, int pad_h, int pad_w, int stride_h, int stride_w) {
  switch (poolmethod) {
  case PoolingParameter_PoolMethod_MAX:
    *mode = CUDNN_POOLING_MAX;
    break;
  case PoolingParameter_PoolMethod_AVE:
    *mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }
  CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,
        pad_h, pad_w, stride_h, stride_w));
}
Exemple #18
0
void PoolBC01CuDNN<T>::fprop(const T *imgs, int *imgs_shape, T *poolout) {
  bool new_shape = false;
  int n_imgs_dims = n_img_dims + 2;
  for (int i = 0; i < n_imgs_dims; ++i) {
    if (this->imgs_shape[i] != imgs_shape[i]) {
      new_shape = true;
      break;
    }
  }

  if (new_shape) {
    for (int i = 0; i < n_imgs_dims; ++i) {
      this->imgs_shape[i] = imgs_shape[i];
    }
    int imgs_strides[n_imgs_dims];
    array_strides(n_imgs_dims, imgs_shape, imgs_strides);
    CUDNN_CHECK(cudnnSetTensorNdDescriptor(
        imgs_desc, CUDNN_DATA_FLOAT, n_imgs_dims, imgs_shape, imgs_strides
    ));

    CUDNN_CHECK(cudnnSetPoolingNdDescriptor(
        pool_desc, pool_mode, n_img_dims, win_shape, padding, strides
    ));

    int poolout_shape[n_imgs_dims];
    poolout_shape[0] = imgs_shape[0];
    poolout_shape[1] = imgs_shape[1];
    for (int i = 0; i < n_img_dims; ++i) {
      poolout_shape[i+2] = (imgs_shape[i+2] + 2*padding[i] - win_shape[i])
                           / strides[i] + 1;
    }

    int poolout_strides[n_imgs_dims];
    array_strides(n_imgs_dims, poolout_shape, poolout_strides);
    CUDNN_CHECK(cudnnSetTensorNdDescriptor(
        poolout_desc, CUDNN_DATA_FLOAT, n_imgs_dims, poolout_shape,
        poolout_strides
    ));
  }

  CUDNN_CHECK(cudnnPoolingForward(
      CUDNN::handle(), pool_desc, &CUDNN::one, imgs_desc, imgs, &CUDNN::zero,
      poolout_desc, poolout
  ));
}
void CuDNNSoftmaxLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  SoftmaxLayer<Dtype>::LayerSetUp(bottom, top);
  // Initialize CUDNN.
  CUDNN_CHECK(cudnnCreate(&handle_));
  cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
  cudnn::createTensor4dDesc<Dtype>(&top_desc_);
  handles_setup_ = true;
}
Exemple #20
0
void CuDNNReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  ReLULayer<Dtype>::LayerSetUp(bottom, top);
  // initialize cuDNN
  CUDNN_CHECK(cudnnCreate(&handle_));
  cudnn::createTensorNdDesc<Dtype>(&bottom_desc_);
  cudnn::createTensorNdDesc<Dtype>(&top_desc_);
  handles_setup_ = true;
}
Exemple #21
0
void CuDNNLRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
                                   const vector<Blob<Dtype>*>& top) {
    LRNLayer<Dtype>::Reshape(bottom, top);
    cudnn::setTensor4dDesc<Dtype>(&bottom_desc_, bottom[0]->num(),
                                  this->channels_, this->height_, this->width_);
    cudnn::setTensor4dDesc<Dtype>(&top_desc_, bottom[0]->num(),
                                  this->channels_, this->height_, this->width_);
    CUDNN_CHECK(cudnnSetLRNDescriptor(norm_desc_, size_, alpha_, beta_, k_));
}
Exemple #22
0
void CuDNNTanHLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  TanHLayer<Dtype>::LayerSetUp(bottom, top);
  // initialize cuDNN
  CUDNN_CHECK(cudnnCreate(&handle_));
  cudnn::createTensorDesc<Dtype>(&bottom_desc_);
  cudnn::createTensorDesc<Dtype>(&top_desc_);
  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_TANH);
  handles_setup_ = true;
}
Exemple #23
0
void CuDNNReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  ReLULayer<Dtype>::LayerSetUp(bottom, top);
  // initialize cuDNN
  CUDNN_CHECK(cudnnCreate(&handle_));
  cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
  cudnn::createTensor4dDesc<Dtype>(&top_desc_);
  handles_setup_ = true;
  cudnnCreateActivationDescriptor(&activation_desc_);
  cudnnSetActivationDescriptor(activation_desc_, CUDNN_ACTIVATION_RELU, CUDNN_PROPAGATE_NAN, 0.0);
}
Exemple #24
0
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
    const int_tp num_spatial_dims,
    const int_tp n, const int_tp c, const int_tp* shape) {

  std::vector<int> shape_int(num_spatial_dims + 2);

  shape_int[0] = n;
  shape_int[1] = c;

  for (int_tp i = 0; i < num_spatial_dims; ++i) {
    shape_int[2+i] = shape[i];
  }

  const int* shape_ptr = &shape_int[0];

  CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
  CUDNN_CHECK(cudnnSetFilterNdDescriptor(*desc, dataType<Dtype>::type,
                                         num_spatial_dims + 2,
                                         shape_ptr));
}
Exemple #25
0
PoolBC01CuDNN<T>::PoolBC01CuDNN(int n_img_dims, int *win_shape, int *padding,
    int *strides, PoolMode pool_mode) : n_img_dims(n_img_dims) {
  if (n_img_dims > MAX_IMG_DIMS + 2) {
    throw std::runtime_error("More than 3 image dimensions.");
  }

  for (int i = 0; i < n_img_dims; ++i) {
    this->win_shape[i] = win_shape[i];
    this->padding[i] = padding[i];
    this->strides[i] = strides[i];
  }
  for (int i = 0; i < n_img_dims + 2; ++i) {
    imgs_shape[i] = -1;
  }
  this->pool_mode = pool_mode == POOL_MAX ? CUDNN_POOLING_MAX :
      CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
  CUDNN_CHECK(cudnnCreateTensorDescriptor(&imgs_desc));
  CUDNN_CHECK(cudnnCreateTensorDescriptor(&poolout_desc));
  CUDNN_CHECK(cudnnCreatePoolingDescriptor(&pool_desc));
}
Exemple #26
0
void CuDNNPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  PoolingLayer<Dtype>::LayerSetUp(bottom, top);
  CUDNN_CHECK(cudnnCreate(&handle_));
  cudnn::createTensorDesc<Dtype>(&bottom_desc_);
  cudnn::createTensorDesc<Dtype>(&top_desc_);
  cudnn::createNdPoolingDesc<Dtype>(&pooling_desc_,
      this->layer_param_.pooling_param().pool(), &mode_,
      this->kernel_shape_, this->pad_, this->stride_);
  handles_setup_ = true;
}
void CuDNNTanHLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      vector<Blob<Dtype>*>* top) {
  TanHLayer<Dtype>::LayerSetUp(bottom, top);
  // initialize cuDNN
  CUDNN_CHECK(cudnnCreate(&handle_));
  const int N = bottom[0]->num();
  const int K = bottom[0]->channels();
  const int H = bottom[0]->height();
  const int W = bottom[0]->width();
  cudnn::createTensor4dDesc<Dtype>(&bottom_desc_, N, K, H, W);
  cudnn::createTensor4dDesc<Dtype>(&top_desc_, N, K, H, W);
}
Exemple #28
0
void ConvBC01CuDNN<T>::bprop(const T* imgs, const T* filters,
                             const T *convout_d, T *imgs_d, T *filters_d) {
  void *workspace = NULL;
  if (workspace_size > 0) {
    workspace = CUDA::buffer(workspace_size);
  }
  if (filters_d) {
    CUDNN_CHECK(cudnnConvolutionBackwardFilter(
      CUDNN::handle(), &CUDNN::one, imgs_desc, imgs, convout_desc, convout_d,
      conv_desc, bwd_filters_algo, workspace, workspace_size, &CUDNN::zero,
      filters_desc, filters_d
    ));
  }
  if (imgs_d) {
    CUDNN_CHECK(cudnnConvolutionBackwardData(
      CUDNN::handle(), &CUDNN::one, filters_desc, filters, convout_desc,
      convout_d, conv_desc, bwd_imgs_algo, workspace, workspace_size,
      &CUDNN::zero, imgs_desc, imgs_d
    ));
  }
}
 void CuDNNPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   PoolingLayer<Dtype>::LayerSetUp(bottom, top);
   // Sanity check: CUDNN currently only supports pad == 0.
   CHECK_EQ(this->pad_h_, 0);
   CHECK_EQ(this->pad_w_, 0);
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
   cudnn::createPoolingDesc<Dtype>(&pooling_desc_,
       this->layer_param_.pooling_param().pool(), &mode_,
       this->kernel_h_, this->kernel_w_, this->stride_h_, this->stride_w_);
   handles_setup_ = true;
 }
Exemple #30
0
void CuDNNTanHLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  TanHLayer<Dtype>::LayerSetUp(bottom, top);
  // initialize cuDNN
  CUDNN_CHECK(cudnnCreate(&handle_));
  cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
  cudnn::createTensor4dDesc<Dtype>(&top_desc_);
#if CUDNN_VERSION_MIN(5, 0, 0)
  cudnnCreateActivationDescriptor(&activation_desc_);
  cudnnSetActivationDescriptor(activation_desc_,
      CUDNN_ACTIVATION_TANH, CUDNN_PROPAGATE_NAN, 0);
#endif
  handles_setup_ = true;
}