Exemple #1
0
CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
  // Check that handles have been setup before destroying.
  if (!handles_setup_) { return; }

  for (int_tp i = 0; i < bottom_descs_.size(); i++) {
    cudnnDestroyTensorDescriptor(bottom_descs_[i]);
    cudnnDestroyTensorDescriptor(top_descs_[i]);
    cudnnDestroyConvolutionDescriptor(conv_descs_[i]);
  }
  if (this->bias_term_) {
    cudnnDestroyTensorDescriptor(bias_desc_);
  }
  cudnnDestroyFilterDescriptor(filter_desc_);

  for (int_tp g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
    cudaStreamDestroy(stream_[g]);
    cudnnDestroy(handle_[g]);
  }

  cudaFree(workspaceData);
  delete [] stream_;
  delete [] handle_;
  delete [] fwd_algo_;
  delete [] bwd_filter_algo_;
  delete [] bwd_data_algo_;
  delete [] workspace_fwd_sizes_;
  delete [] workspace_bwd_data_sizes_;
  delete [] workspace_bwd_filter_sizes_;
}
Exemple #2
0
CuDNNReLULayer<Dtype>::~CuDNNReLULayer() {
  // Check that handles have been setup before destroying.
  if (!handles_setup_) { return; }

  cudnnDestroyTensorDescriptor(this->bottom_desc_);
  cudnnDestroyTensorDescriptor(this->top_desc_);
  cudnnDestroy(this->handle_);
}
CuDNNSoftmaxLayer<Dtype, MItype, MOtype>::~CuDNNSoftmaxLayer() {
  // Check that handles have been setup before destroying.
  if (!handles_setup_) { return; }

  cudnnDestroyTensorDescriptor(bottom_desc_);
  cudnnDestroyTensorDescriptor(top_desc_);
  cudnnDestroy(handle_);
}
Exemple #4
0
GpuDevice::Impl::~Impl() {
  ActivateDevice();
  for (size_t i = 0; i < kParallelism; ++i) {
    CUDNN_CALL(cudnnDestroy(cudnn_handle[i]));
    CUBLAS_CALL(cublasDestroy(cublas_handle[i]));
    CUDA_CALL(cudaStreamDestroy(stream[i]));
  }
}
  CuDNNPoolingLayer<Dtype>::~CuDNNPoolingLayer() {
    // Check that handles have been setup before destroying.
    if (!handles_setup_) {return;}

    cudnnDestroyTensor4dDescriptor(bottom_desc_);
    cudnnDestroyTensor4dDescriptor(top_desc_);
    cudnnDestroyPoolingDescriptor(pooling_desc_);
    cudnnDestroy(handle_);
  }
Exemple #6
0
GpuDevice::~GpuDevice() {
  CUDA_CALL(cudaSetDevice(device_));
  pool_.WaitForAllFinished();
  for (size_t i = 0; i < kParallelism; ++i) {
    CUDNN_CALL(cudnnDestroy(cudnn_handle_[i]));
    CUBLAS_CALL(cublasDestroy(cublas_handle_[i]));
    CUDA_CALL(cudaStreamDestroy(stream_[i]));
  }
  delete data_store_;
}
Exemple #7
0
CuDNNTanHLayer<Dtype>::~CuDNNTanHLayer() {
  // Check that handles have been setup before destroying.
  if (!handles_setup_) { return; }

  cudnnDestroyTensorDescriptor(this->bottom_desc_);
  cudnnDestroyTensorDescriptor(this->top_desc_);
#if CUDNN_VERSION_MIN(5, 0, 0)
  cudnnDestroyActivationDescriptor(this->activation_desc_);
#endif
  cudnnDestroy(this->handle_);
}
CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
  for (int i = 0; i < bottom_descs_.size(); i++) {
    cudnnDestroyTensor4dDescriptor(bottom_descs_[i]);
    cudnnDestroyTensor4dDescriptor(top_descs_[i]);
    cudnnDestroyConvolutionDescriptor(conv_descs_[i]);
  }
  if (this->bias_term_) {
    cudnnDestroyTensor4dDescriptor(bias_desc_);
  }
  cudnnDestroyFilterDescriptor(filter_desc_);

  for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
    cudaStreamDestroy(stream_[g]);
    cudnnDestroy(handle_[g]);
  }

  delete [] stream_;
  delete [] handle_;
}
Exemple #9
0
void Context::Clear() {
#if defined(USE_CUDA)
  if (blas_handle_ != nullptr) {
    CUBLAS_CHECK(cublasDestroy(cublasHandle_t(blas_handle_)));
    blas_handle_ = nullptr;
  }
#endif

#if defined(USE_CUDNN)
  if (cudnn_handle_ != nullptr) {
    CUDNN_CHECK(cudnnDestroy(cudnnHandle_t(cudnn_handle_)));
    cudnn_handle_ = nullptr;
  }
#endif

#if defined(USE_NNPACK)
  if (nnpack_handle_ != nullptr) {
    CHECK_EQ(nnp_deinitialize(), nnp_status_success);
    pthreadpool_destroy(pthreadpool_t(nnpack_handle_));
    nnpack_handle_ = nullptr;
  }
#endif
}
CudnnNdConvolutionLayer<Dtype>::~CudnnNdConvolutionLayer() {
  // Check that handles have been setup before destroying.
  if (!handles_setup_) {
    return;
  }

  for (int i = 0; i < bottom_descs_.size(); i++) {
    cudnnDestroyTensorDescriptor(bottom_descs_[i]);
    cudnnDestroyTensorDescriptor(top_descs_[i]);
    cudnnDestroyConvolutionDescriptor(conv_descs_[i]);
  }
  if (this->bias_term_) {
    cudnnDestroyTensorDescriptor(bias_desc_);
  }
  cudnnDestroyFilterDescriptor(filter_desc_);

  for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
    cudaStreamDestroy(stream_[g]);
    cudnnDestroy(handle_[g]);
  }

  delete [] stream_;
  delete [] handle_;
}
CuDNNHandle::~CuDNNHandle() {
  CUDNN_CHECK(cudnnDestroy(handle_));
}
 virtual ~CuDNNPoolingLayer(void) {
   CUDA_CHECK(cudnnDestroyTensorDescriptor(in_desc_));
   CUDA_CHECK(cudnnDestroyTensorDescriptor(out_desc_));
   CUDA_CHECK(cudnnDestroyPoolingDescriptor(pooling_desc_));
   CUDA_CHECK(cudnnDestroy(handle_));
 }
SaberStatus VenderConv2DActPooling<NV, AK_FLOAT, AK_FLOAT, AK_FLOAT, NCHW, NCHW, NCHW>::\
    create(const std::vector<DataTensor_in *>& inputs,
            std::vector<DataTensor_out *>& outputs,
            ConvActivePoolingParam<OpTensor>& param, Context<NV> &ctx) {

    if (!(ctx == this->_ctx)) {
        if (_handle != NULL) {
            CUDNN_CHECK(cudnnDestroy(_handle));
        }
        this->_ctx = ctx;

        cudaStream_t cuda_stream;
        cuda_stream = ctx.get_compute_stream();

        CUDNN_CHECK(cudnnCreate(&_handle));
        CUDNN_CHECK(cudnnSetStream(_handle, cuda_stream));
    }

    int input_num = inputs[0]->num();
    int input_channel = inputs[0]->channel();
    int input_height = inputs[0]->height();
    int input_width = inputs[0]->width();
    int output_channel = outputs[0]->channel();
    int output_height = outputs[0]->height();
    int output_width = outputs[0]->width();
    {
        _inner_shape = inputs[0]->shape();
        _inner_shape[0] = input_num;
        _inner_shape[1] = param.conv_param.weight()->num();

        int kernel_exten = param.conv_param.dilation_h *
                           (param.conv_param.weight()->height() - 1) + 1;
        int output_dim = (input_height + 2 * param.conv_param.pad_h - kernel_exten)
                         / param.conv_param.stride_h + 1;
        _inner_shape[2] = output_dim;
        kernel_exten = param.conv_param.dilation_w *
                           (param.conv_param.weight()->width() - 1) + 1;
        output_dim = (input_width + 2 * param.conv_param.pad_w - kernel_exten)
                         / param.conv_param.stride_w + 1;
        _inner_shape[3] = output_dim;
        _inner_tensor.re_alloc(_inner_shape);
    }

    int kernel_h = param.conv_param.weight()->height();
    int kernel_w = param.conv_param.weight()->width();

    int filter_dim_a[] = {output_channel,
                          input_channel / param.conv_param.group,
                          kernel_h, kernel_w};

    cudnn::setNDFilterDesc<OpDataType>(&_filter_desc,
                                    param.conv_param.weight()->dims(),
                                    filter_dim_a, CUDNN_TENSOR_NCHW);

    Shape in_stride = inputs[0]->get_stride();
    Shape inner_stride = _inner_tensor.get_stride();
    Shape out_stride = outputs[0]->get_stride();

    int dim_a[] = {input_num, input_channel,
                   input_height, input_width};

    int dim_inner[] = {_inner_shape[0], _inner_shape[1],
                        _inner_shape[2], _inner_shape[3]};

    int dim_b[] = {input_num, output_channel,
                   output_height, output_width};
    cudnn::setTensorNdDesc<InDataType >(&_input_descs,
                                       inputs[0]->dims(), dim_a, &in_stride[0]);
    cudnn::setTensorNdDesc<InDataType >(&_inner_descs,
                                       4, dim_inner,
                                       &inner_stride[0]);
    cudnn::setTensorNdDesc<InDataType>(&_output_descs,
                                      outputs[0]->dims(), dim_b, &out_stride[0]);
    int pad_a[] = {param.conv_param.pad_h, param.conv_param.pad_w};
    int filter_stride_a[] = {param.conv_param.stride_h, param.conv_param.stride_w};
    int dilation_a[] = {param.conv_param.dilation_h, param.conv_param.dilation_w};

    cudnn::setConvolutionNdDesc<OpDataType >(&_conv_descs,
                                          inputs[0]->dims() - 2, pad_a,
                                          filter_stride_a, dilation_a);
    // set activation descriptor
    if (param.has_activation) {
        cudnn::set_activation_des<OpDataType>(&_active_descs, param.activation_param.active);
    }
    if (param.has_pooling) {
        int windowHeight[] = {param.pooling_param.window_h,
                              param.pooling_param.window_w};
        int padding[] = {param.pooling_param.pad_h,
                         param.pooling_param.pad_w};
        int stride[] = {param.pooling_param.stride_h,
                        param.pooling_param.stride_w};

        cudnn::set_nd_pooling_des<OpDataType >(&_pooling_descs,
                                            param.pooling_param.pooling_type,
                                            _inner_tensor.dims() - 2,
                                            windowHeight,
                                            padding,stride);
    }
    // true: use tensor core
    // false: disable tensor core
    cudnn::set_math_type<OpDataType>(&_conv_descs, _use_tensor_core);
    cudnn::set_group_count<OpDataType>(&_conv_descs, param.conv_param.group);

    // Get fastest implement of cudnn
    // set up algo and workspace size
    if (param.conv_param.group == inputs[0]->channel() && \
        inputs[0]->channel() == outputs[0]->channel()) {
        _fwd_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;//CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
    } else {
        CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(_handle, \
            _input_descs, _filter_desc, _conv_descs, _inner_descs, \
            _preference, _workspace_limit_bytes, &_fwd_algo));
    }

    CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(_handle,
                                                        _input_descs, _filter_desc,
                                                        _conv_descs, _inner_descs,
                                                        _fwd_algo, &_workspace_fwd_sizes));

    if (_workspace_fwd_sizes > _workspaceSizeInBytes) {
        _workspaceSizeInBytes = _workspace_fwd_sizes;
        if (_workspaceData != NULL) {
            cudaFree(_workspaceData);
        }
        cudaMalloc(&_workspaceData, _workspaceSizeInBytes);
        _workspace = reinterpret_cast<char*>(_workspaceData);
    }

    if (param.conv_param.bias()->size()> 0) {
        int dim_bias[] = {1, output_channel, 1, 1};
        int stride_bias[] = {output_channel, 1, 1, 1};

        cudnn::setTensorNdDesc<OpDataType >(&_bias_desc,
                                         4, dim_bias, stride_bias);
    }
    return SaberSuccess;
}
CuDNNTanHLayer<Dtype>::~CuDNNTanHLayer() {
  cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
  cudnnDestroyTensor4dDescriptor(this->top_desc_);
  cudnnDestroy(this->handle_);
}
Exemple #15
0
 ~CUDNN() {
   CUDNN_CHECK(cudnnDestroy(handle_));
 }