CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } for (int_tp i = 0; i < bottom_descs_.size(); i++) { cudnnDestroyTensorDescriptor(bottom_descs_[i]); cudnnDestroyTensorDescriptor(top_descs_[i]); cudnnDestroyConvolutionDescriptor(conv_descs_[i]); } if (this->bias_term_) { cudnnDestroyTensorDescriptor(bias_desc_); } cudnnDestroyFilterDescriptor(filter_desc_); for (int_tp g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) { cudaStreamDestroy(stream_[g]); cudnnDestroy(handle_[g]); } cudaFree(workspaceData); delete [] stream_; delete [] handle_; delete [] fwd_algo_; delete [] bwd_filter_algo_; delete [] bwd_data_algo_; delete [] workspace_fwd_sizes_; delete [] workspace_bwd_data_sizes_; delete [] workspace_bwd_filter_sizes_; }
CuDNNReLULayer<Dtype>::~CuDNNReLULayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(this->bottom_desc_); cudnnDestroyTensorDescriptor(this->top_desc_); cudnnDestroy(this->handle_); }
CuDNNSoftmaxLayer<Dtype, MItype, MOtype>::~CuDNNSoftmaxLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(bottom_desc_); cudnnDestroyTensorDescriptor(top_desc_); cudnnDestroy(handle_); }
GpuDevice::Impl::~Impl() { ActivateDevice(); for (size_t i = 0; i < kParallelism; ++i) { CUDNN_CALL(cudnnDestroy(cudnn_handle[i])); CUBLAS_CALL(cublasDestroy(cublas_handle[i])); CUDA_CALL(cudaStreamDestroy(stream[i])); } }
CuDNNPoolingLayer<Dtype>::~CuDNNPoolingLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) {return;} cudnnDestroyTensor4dDescriptor(bottom_desc_); cudnnDestroyTensor4dDescriptor(top_desc_); cudnnDestroyPoolingDescriptor(pooling_desc_); cudnnDestroy(handle_); }
GpuDevice::~GpuDevice() { CUDA_CALL(cudaSetDevice(device_)); pool_.WaitForAllFinished(); for (size_t i = 0; i < kParallelism; ++i) { CUDNN_CALL(cudnnDestroy(cudnn_handle_[i])); CUBLAS_CALL(cublasDestroy(cublas_handle_[i])); CUDA_CALL(cudaStreamDestroy(stream_[i])); } delete data_store_; }
CuDNNTanHLayer<Dtype>::~CuDNNTanHLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(this->bottom_desc_); cudnnDestroyTensorDescriptor(this->top_desc_); #if CUDNN_VERSION_MIN(5, 0, 0) cudnnDestroyActivationDescriptor(this->activation_desc_); #endif cudnnDestroy(this->handle_); }
CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() { for (int i = 0; i < bottom_descs_.size(); i++) { cudnnDestroyTensor4dDescriptor(bottom_descs_[i]); cudnnDestroyTensor4dDescriptor(top_descs_[i]); cudnnDestroyConvolutionDescriptor(conv_descs_[i]); } if (this->bias_term_) { cudnnDestroyTensor4dDescriptor(bias_desc_); } cudnnDestroyFilterDescriptor(filter_desc_); for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) { cudaStreamDestroy(stream_[g]); cudnnDestroy(handle_[g]); } delete [] stream_; delete [] handle_; }
void Context::Clear() { #if defined(USE_CUDA) if (blas_handle_ != nullptr) { CUBLAS_CHECK(cublasDestroy(cublasHandle_t(blas_handle_))); blas_handle_ = nullptr; } #endif #if defined(USE_CUDNN) if (cudnn_handle_ != nullptr) { CUDNN_CHECK(cudnnDestroy(cudnnHandle_t(cudnn_handle_))); cudnn_handle_ = nullptr; } #endif #if defined(USE_NNPACK) if (nnpack_handle_ != nullptr) { CHECK_EQ(nnp_deinitialize(), nnp_status_success); pthreadpool_destroy(pthreadpool_t(nnpack_handle_)); nnpack_handle_ = nullptr; } #endif }
CudnnNdConvolutionLayer<Dtype>::~CudnnNdConvolutionLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } for (int i = 0; i < bottom_descs_.size(); i++) { cudnnDestroyTensorDescriptor(bottom_descs_[i]); cudnnDestroyTensorDescriptor(top_descs_[i]); cudnnDestroyConvolutionDescriptor(conv_descs_[i]); } if (this->bias_term_) { cudnnDestroyTensorDescriptor(bias_desc_); } cudnnDestroyFilterDescriptor(filter_desc_); for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) { cudaStreamDestroy(stream_[g]); cudnnDestroy(handle_[g]); } delete [] stream_; delete [] handle_; }
CuDNNHandle::~CuDNNHandle() { CUDNN_CHECK(cudnnDestroy(handle_)); }
virtual ~CuDNNPoolingLayer(void) { CUDA_CHECK(cudnnDestroyTensorDescriptor(in_desc_)); CUDA_CHECK(cudnnDestroyTensorDescriptor(out_desc_)); CUDA_CHECK(cudnnDestroyPoolingDescriptor(pooling_desc_)); CUDA_CHECK(cudnnDestroy(handle_)); }
SaberStatus VenderConv2DActPooling<NV, AK_FLOAT, AK_FLOAT, AK_FLOAT, NCHW, NCHW, NCHW>::\ create(const std::vector<DataTensor_in *>& inputs, std::vector<DataTensor_out *>& outputs, ConvActivePoolingParam<OpTensor>& param, Context<NV> &ctx) { if (!(ctx == this->_ctx)) { if (_handle != NULL) { CUDNN_CHECK(cudnnDestroy(_handle)); } this->_ctx = ctx; cudaStream_t cuda_stream; cuda_stream = ctx.get_compute_stream(); CUDNN_CHECK(cudnnCreate(&_handle)); CUDNN_CHECK(cudnnSetStream(_handle, cuda_stream)); } int input_num = inputs[0]->num(); int input_channel = inputs[0]->channel(); int input_height = inputs[0]->height(); int input_width = inputs[0]->width(); int output_channel = outputs[0]->channel(); int output_height = outputs[0]->height(); int output_width = outputs[0]->width(); { _inner_shape = inputs[0]->shape(); _inner_shape[0] = input_num; _inner_shape[1] = param.conv_param.weight()->num(); int kernel_exten = param.conv_param.dilation_h * (param.conv_param.weight()->height() - 1) + 1; int output_dim = (input_height + 2 * param.conv_param.pad_h - kernel_exten) / param.conv_param.stride_h + 1; _inner_shape[2] = output_dim; kernel_exten = param.conv_param.dilation_w * (param.conv_param.weight()->width() - 1) + 1; output_dim = (input_width + 2 * param.conv_param.pad_w - kernel_exten) / param.conv_param.stride_w + 1; _inner_shape[3] = output_dim; _inner_tensor.re_alloc(_inner_shape); } int kernel_h = param.conv_param.weight()->height(); int kernel_w = param.conv_param.weight()->width(); int filter_dim_a[] = {output_channel, input_channel / param.conv_param.group, kernel_h, kernel_w}; cudnn::setNDFilterDesc<OpDataType>(&_filter_desc, param.conv_param.weight()->dims(), filter_dim_a, CUDNN_TENSOR_NCHW); Shape in_stride = inputs[0]->get_stride(); Shape inner_stride = _inner_tensor.get_stride(); Shape out_stride = outputs[0]->get_stride(); int dim_a[] = {input_num, input_channel, input_height, input_width}; int dim_inner[] = {_inner_shape[0], _inner_shape[1], _inner_shape[2], _inner_shape[3]}; int dim_b[] = {input_num, output_channel, output_height, output_width}; cudnn::setTensorNdDesc<InDataType >(&_input_descs, inputs[0]->dims(), dim_a, &in_stride[0]); cudnn::setTensorNdDesc<InDataType >(&_inner_descs, 4, dim_inner, &inner_stride[0]); cudnn::setTensorNdDesc<InDataType>(&_output_descs, outputs[0]->dims(), dim_b, &out_stride[0]); int pad_a[] = {param.conv_param.pad_h, param.conv_param.pad_w}; int filter_stride_a[] = {param.conv_param.stride_h, param.conv_param.stride_w}; int dilation_a[] = {param.conv_param.dilation_h, param.conv_param.dilation_w}; cudnn::setConvolutionNdDesc<OpDataType >(&_conv_descs, inputs[0]->dims() - 2, pad_a, filter_stride_a, dilation_a); // set activation descriptor if (param.has_activation) { cudnn::set_activation_des<OpDataType>(&_active_descs, param.activation_param.active); } if (param.has_pooling) { int windowHeight[] = {param.pooling_param.window_h, param.pooling_param.window_w}; int padding[] = {param.pooling_param.pad_h, param.pooling_param.pad_w}; int stride[] = {param.pooling_param.stride_h, param.pooling_param.stride_w}; cudnn::set_nd_pooling_des<OpDataType >(&_pooling_descs, param.pooling_param.pooling_type, _inner_tensor.dims() - 2, windowHeight, padding,stride); } // true: use tensor core // false: disable tensor core cudnn::set_math_type<OpDataType>(&_conv_descs, _use_tensor_core); cudnn::set_group_count<OpDataType>(&_conv_descs, param.conv_param.group); // Get fastest implement of cudnn // set up algo and workspace size if (param.conv_param.group == inputs[0]->channel() && \ inputs[0]->channel() == outputs[0]->channel()) { _fwd_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;//CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; } else { CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(_handle, \ _input_descs, _filter_desc, _conv_descs, _inner_descs, \ _preference, _workspace_limit_bytes, &_fwd_algo)); } CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(_handle, _input_descs, _filter_desc, _conv_descs, _inner_descs, _fwd_algo, &_workspace_fwd_sizes)); if (_workspace_fwd_sizes > _workspaceSizeInBytes) { _workspaceSizeInBytes = _workspace_fwd_sizes; if (_workspaceData != NULL) { cudaFree(_workspaceData); } cudaMalloc(&_workspaceData, _workspaceSizeInBytes); _workspace = reinterpret_cast<char*>(_workspaceData); } if (param.conv_param.bias()->size()> 0) { int dim_bias[] = {1, output_channel, 1, 1}; int stride_bias[] = {output_channel, 1, 1, 1}; cudnn::setTensorNdDesc<OpDataType >(&_bias_desc, 4, dim_bias, stride_bias); } return SaberSuccess; }
CuDNNTanHLayer<Dtype>::~CuDNNTanHLayer() { cudnnDestroyTensor4dDescriptor(this->bottom_desc_); cudnnDestroyTensor4dDescriptor(this->top_desc_); cudnnDestroy(this->handle_); }
~CUDNN() { CUDNN_CHECK(cudnnDestroy(handle_)); }