CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } for (int_tp i = 0; i < bottom_descs_.size(); i++) { cudnnDestroyTensorDescriptor(bottom_descs_[i]); cudnnDestroyTensorDescriptor(top_descs_[i]); cudnnDestroyConvolutionDescriptor(conv_descs_[i]); } if (this->bias_term_) { cudnnDestroyTensorDescriptor(bias_desc_); } cudnnDestroyFilterDescriptor(filter_desc_); for (int_tp g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) { cudaStreamDestroy(stream_[g]); cudnnDestroy(handle_[g]); } cudaFree(workspaceData); delete [] stream_; delete [] handle_; delete [] fwd_algo_; delete [] bwd_filter_algo_; delete [] bwd_data_algo_; delete [] workspace_fwd_sizes_; delete [] workspace_bwd_data_sizes_; delete [] workspace_bwd_filter_sizes_; }
CuDNNSoftmaxLayer<Dtype>::~CuDNNSoftmaxLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(bottom_desc_); cudnnDestroyTensorDescriptor(top_desc_); }
THFloatTensor *cudnn_SpatialMaxPooling_updateOutput(struct module *module, THFloatTensor *input) { int kW = module->SpatialMaxPooling.kW; int kH = module->SpatialMaxPooling.kH; int dW = module->SpatialMaxPooling.dW; int dH = module->SpatialMaxPooling.dH; int padW = module->SpatialMaxPooling.padW; int padH = module->SpatialMaxPooling.padH; THFloatTensor *output = module->output; cudnnTensorDescriptor_t dinput, doutput; cudnnPoolingDescriptor_t dpool; float one = 1, zero = 0; int sizes[4]; errcheck(THcudnn_TensorDescriptor(&dinput, input)); errcheck(cudnnCreatePoolingDescriptor(&dpool)); errcheck(cudnnSetPooling2dDescriptor(dpool, CUDNN_POOLING_MAX, kH, kW, padH, padW, dH, dW)); errcheck(cudnnGetPoolingNdForwardOutputDim(dpool, dinput, 4, sizes)); THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]); errcheck(THcudnn_TensorDescriptor(&doutput, output)); errcheck(cudnnPoolingForward(THcudnn_getHandle(), dpool, &one, dinput, THFloatTensor_data(input), &zero, doutput, THFloatTensor_data(output))); cudnnDestroyTensorDescriptor(dinput); cudnnDestroyTensorDescriptor(doutput); cudnnDestroyPoolingDescriptor(dpool); return output; }
CuDNNReLULayer<Dtype>::~CuDNNReLULayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(this->bottom_desc_); cudnnDestroyTensorDescriptor(this->top_desc_); cudnnDestroy(this->handle_); }
sparse_1x1_layer_tester_cuda::~sparse_1x1_layer_tester_cuda() { cudnnDestroyTensorDescriptor(input_strided_data_desc); cudnnDestroyTensorDescriptor(input_converted_NHWC_data_desc); cudnnDestroyTensorDescriptor(input_converted_CNHW_data_desc); cudnnDestroyTensorDescriptor(output_data_desc); cudnnDestroyTensorDescriptor(bias_desc); }
CuDNNSigmoidLayer<Dtype>::~CuDNNSigmoidLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyActivationDescriptor(this->activ_desc_); cudnnDestroyTensorDescriptor(this->bottom_desc_); cudnnDestroyTensorDescriptor(this->top_desc_); }
convolution_layer_updater_cuda::~convolution_layer_updater_cuda() { cudnnDestroyTensorDescriptor(input_data_desc); cudnnDestroyTensorDescriptor(output_data_desc); cudnnDestroyFilterDescriptor(weights_desc); cudnnDestroyConvolutionDescriptor(convolution_desc); cudnnDestroyTensorDescriptor(bias_desc); }
CuDNNPoolingLayer<Dtype>::~CuDNNPoolingLayer() { // check that handles have been setup before destroying if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(bottom_desc_); cudnnDestroyTensorDescriptor(top_desc_); cudnnDestroyPoolingDescriptor(pooling_desc_); cudnnDestroy(handle_); }
CuDNNTanHLayer<Ftype, Btype>::~CuDNNTanHLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyActivationDescriptor(this->activ_desc_); cudnnDestroyTensorDescriptor(fwd_bottom_desc_); cudnnDestroyTensorDescriptor(fwd_top_desc_); cudnnDestroyTensorDescriptor(bwd_bottom_desc_); cudnnDestroyTensorDescriptor(bwd_top_desc_); }
CuDNNTanHLayer<Dtype>::~CuDNNTanHLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } cudnnDestroyTensorDescriptor(this->bottom_desc_); cudnnDestroyTensorDescriptor(this->top_desc_); #if CUDNN_VERSION_MIN(5, 0, 0) cudnnDestroyActivationDescriptor(this->activation_desc_); #endif cudnnDestroy(this->handle_); }
CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() { for (int i = 0; i < bottom_descs_.size(); i++) { cudnnDestroyTensorDescriptor(bottom_descs_[i]); cudnnDestroyTensorDescriptor(top_descs_[i]); cudnnDestroyConvolutionDescriptor(conv_descs_[i]); } if (this->bias_term_) { cudnnDestroyTensorDescriptor(bias_desc_); } cudnnDestroyFilterDescriptor(filter_desc_); for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) { cudaStreamDestroy(stream_[g]); cudnnDestroy(handle_[g]); } delete [] stream_; delete [] handle_; }
THFloatTensor *cudnn_Threshold_updateOutput(struct module *module, THFloatTensor *input) { THFloatTensor *output = module->output; cudnnTensorDescriptor_t dinput, doutput; int inplace = module->Threshold.inplace; float one = 1, zero = 0; errcheck(THcudnn_TensorDescriptor(&dinput, input)); if(inplace) THFloatTensor_set(output, input); else THCudaTensor_resize4d(output, input->size[0], input->size[1], input->size[2], input->size[3]); errcheck(THcudnn_TensorDescriptor(&doutput, output)); errcheck(cudnnActivationForward(THcudnn_getHandle(), CUDNN_ACTIVATION_RELU, &one, dinput, THFloatTensor_data(input), &zero, doutput, THFloatTensor_data(output))); cudnnDestroyTensorDescriptor(dinput); cudnnDestroyTensorDescriptor(doutput); return output; }
CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } for (int i = 0; i < bottom_descs_.size(); i++) { cudnnDestroyTensorDescriptor(bottom_descs_[i]); cudnnDestroyTensorDescriptor(top_descs_[i]); cudnnDestroyConvolutionDescriptor(conv_descs_[i]); } if (this->bias_term_) { cudnnDestroyTensorDescriptor(bias_desc_); } cudnnDestroyFilterDescriptor(filter_desc_); delete [] fwd_algo_; delete [] bwd_filter_algo_; delete [] bwd_data_algo_; delete [] workspace_fwd_sizes_; delete [] workspace_bwd_data_sizes_; delete [] workspace_bwd_filter_sizes_; }
static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc) { cudnnStatus_t err; err = cudnnCreateTensorDescriptor(desc); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not create tensor descriptor: %s", cudnnGetErrorString(err)); return -1; } if (c_set_tensorNd(var, *desc) != 0) { cudnnDestroyTensorDescriptor(*desc); return -1; } return 0; }
CudnnNdConvolutionLayer<Dtype>::~CudnnNdConvolutionLayer() { // Check that handles have been setup before destroying. if (!handles_setup_) { return; } for (int i = 0; i < bottom_descs_.size(); i++) { cudnnDestroyTensorDescriptor(bottom_descs_[i]); cudnnDestroyTensorDescriptor(top_descs_[i]); cudnnDestroyConvolutionDescriptor(conv_descs_[i]); } if (this->bias_term_) { cudnnDestroyTensorDescriptor(bias_desc_); } cudnnDestroyFilterDescriptor(filter_desc_); for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) { cudaStreamDestroy(stream_[g]); cudnnDestroy(handle_[g]); } delete [] stream_; delete [] handle_; }
activation_layer_cudnn_updater_cuda::~activation_layer_cudnn_updater_cuda() { cudnnDestroyTensorDescriptor(input_data_desc); cudnnDestroyActivationDescriptor(activation_desc); }
softmax_layer_tester_cuda::~softmax_layer_tester_cuda() { cudnnDestroyTensorDescriptor(input_data_desc); }
~CuDnnTensorDescriptor() { cudnnDestroyTensorDescriptor(m_tensorDesc); }
int dnn_rnn_fwd(cudnnRNNDescriptor_t desc, PyGpuArrayObject *w, PyGpuArrayObject *x, PyGpuArrayObject *hx, PyGpuArrayObject *cx, gpudata **reserve, PyGpuArrayObject **y, PyGpuArrayObject **hy, PyGpuArrayObject **cy, cudnnHandle_t _handle) { PyGpuContextObject *c = x->context; cudnnTensorDescriptor_t xdesc = NULL; cudnnTensorDescriptor_t hxdesc = NULL; cudnnTensorDescriptor_t cxdesc = NULL; cudnnTensorDescriptor_t ydesc = NULL; cudnnTensorDescriptor_t hydesc = NULL; cudnnTensorDescriptor_t cydesc = NULL; cudnnFilterDescriptor_t wdesc = NULL; cudnnTensorDescriptor_t *xl = NULL; cudnnTensorDescriptor_t *yl = NULL; gpudata *workspace = NULL; size_t worksize, ressize; size_t seqLength = PyGpuArray_DIM(x, 0); size_t miniBatch = PyGpuArray_DIM(x, 1); size_t inputSize = PyGpuArray_DIM(x, 2); size_t hiddenSizeDir = PyGpuArray_DIM(hx, 2); size_t shape[3]; int strs[3], dims[3]; cudnnStatus_t err; cudnnDataType_t dt; int res = -1; switch (x->ga.typecode) { case GA_FLOAT: dt = CUDNN_DATA_FLOAT; break; case GA_DOUBLE: dt = CUDNN_DATA_DOUBLE; break; case GA_HALF: dt = CUDNN_DATA_HALF; break; default: PyErr_SetString(PyExc_TypeError, "Unsupported data type for x"); return -1; } // This is early to match the exit() in the fail label. cuda_enter(c->ctx); err = cudnnCreateTensorDescriptor(&xdesc); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not create xdesc: %s", cudnnGetErrorString(err)); goto fail; } dims[0] = PyGpuArray_DIM(x, 1); dims[1] = PyGpuArray_DIM(x, 2); dims[2] = 1; strs[0] = dims[1] * dims[2]; strs[1] = dims[2]; strs[2] = 1; err = cudnnSetTensorNdDescriptor(xdesc, dt, 3, dims, strs); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not set xdesc: %s", cudnnGetErrorString(err)); goto fail; } if (c_make_tensorNd(hx, &hxdesc) != 0) goto fail; if (cx != NULL) if (c_make_tensorNd(cx, &cxdesc) != 0) goto fail; if (c_make_filter(w, &wdesc) != 0) goto fail; shape[0] = seqLength; shape[1] = miniBatch; shape[2] = hiddenSizeDir; if (theano_prep_output(y, 3, shape, x->ga.typecode, GA_C_ORDER, c) != 0) goto fail; err = cudnnCreateTensorDescriptor(&ydesc); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not create ydesc: %s", cudnnGetErrorString(err)); goto fail; } dims[0] = shape[1]; dims[1] = shape[2]; dims[2] = 1; strs[0] = dims[2] * dims[1]; strs[1] = dims[2]; strs[2] = 1; err = cudnnSetTensorNdDescriptor(ydesc, dt, 3, dims, strs); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not set ydesc: %s", cudnnGetErrorString(err)); goto fail; } if (theano_prep_output(hy, 3, PyGpuArray_DIMS(hx), hx->ga.typecode, GA_C_ORDER, c) != 0) goto fail; if (c_make_tensorNd(*hy, &hydesc) != 0) goto fail; if (cy != NULL) { if (theano_prep_output(cy, 3, PyGpuArray_DIMS(cx), cx->ga.typecode, GA_C_ORDER, c) != 0) goto fail; if (c_make_tensorNd(*cy, &cydesc) != 0) goto fail; } xl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), seqLength); if (xl == NULL) { PyErr_NoMemory(); goto fail; } for (size_t i = 0; i < seqLength; i++) xl[i] = xdesc; yl = (cudnnTensorDescriptor_t *)calloc(sizeof(cudnnTensorDescriptor_t), seqLength); if (yl == NULL) { PyErr_NoMemory(); goto fail; } for (size_t i = 0; i < seqLength; i++) yl[i] = ydesc; err = cudnnGetRNNWorkspaceSize(_handle, desc, (int)seqLength, xl, &worksize); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not get worksize: %s", cudnnGetErrorString(err)); goto fail; } workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL); if (workspace == NULL) { PyErr_Format(PyExc_RuntimeError, "Could not allocate workspace"); goto fail; } err = cudnnGetRNNTrainingReserveSize(_handle, desc, (int)seqLength, xl, &ressize); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not get reserve size: %s", cudnnGetErrorString(err)); goto fail; } *reserve = gpudata_alloc(c->ctx, ressize, NULL, 0, NULL); if (*reserve == NULL) { PyErr_Format(PyExc_RuntimeError, "Could not allocate reserve"); goto fail; } err = cudnnRNNForwardTraining(_handle, desc, (int)seqLength, xl, PyGpuArray_DEV_DATA(x), hxdesc, PyGpuArray_DEV_DATA(hx), cxdesc, cx ? PyGpuArray_DEV_DATA(cx) : NULL, wdesc, PyGpuArray_DEV_DATA(w), yl, PyGpuArray_DEV_DATA(*y), hydesc, PyGpuArray_DEV_DATA(*hy), cydesc, cy ? PyGpuArray_DEV_DATA(*cy) : NULL, *(void **)workspace, worksize, *(void **)(*reserve), ressize); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could run RNN: %s", cudnnGetErrorString(err)); goto fail; } res = 0; fail: if (xdesc != NULL) cudnnDestroyTensorDescriptor(xdesc); if (hxdesc != NULL) cudnnDestroyTensorDescriptor(hxdesc); if (cxdesc != NULL) cudnnDestroyTensorDescriptor(cxdesc); if (wdesc != NULL) cudnnDestroyFilterDescriptor(wdesc); if (ydesc != NULL) cudnnDestroyTensorDescriptor(ydesc); if (hydesc != NULL) cudnnDestroyTensorDescriptor(hydesc); if (cydesc != NULL) cudnnDestroyTensorDescriptor(cydesc); free(xl); free(yl); if (workspace != NULL) gpudata_release(workspace); cuda_exit(c->ctx); return res; }
ActivationDown::~ActivationDown() { CUDNN_CHECK(cudnnDestroyTensorDescriptor(bottom_desc_)); CUDNN_CHECK(cudnnDestroyTensorDescriptor(top_desc_)); }
SoftmaxDown::~SoftmaxDown() { CUDNN_CHECK(cudnnDestroyTensorDescriptor(bottom_desc_)); CUDNN_CHECK(cudnnDestroyTensorDescriptor(top_desc_)); }
THFloatTensor *cudnn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input) { int kW = module->SpatialConvolution.kW; int kH = module->SpatialConvolution.kH; int dW = module->SpatialConvolution.dW; int dH = module->SpatialConvolution.dH; int padW = module->SpatialConvolution.padW; int padH = module->SpatialConvolution.padH; int nInputPlane = module->SpatialConvolution.nInputPlane; int nOutputPlane = module->SpatialConvolution.nOutputPlane; THFloatTensor *weight = module->SpatialConvolution.weight; THFloatTensor *bias = module->SpatialConvolution.bias; THFloatTensor *output = module->output; int sizes[4]; int pad[2], filterStride[2], upscale[2]; cudnnTensorDescriptor_t dinput, dbias, doutput; cudnnConvolutionDescriptor_t dconv; cudnnFilterDescriptor_t dweight; float one = 1, zero = 0; size_t reqwssize; static void *ws; static size_t wssize; static const int alg = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; pad[0] = padH; pad[1] = padW; filterStride[0] = dH; filterStride[1] = dW; upscale[0] = 1; upscale[1] = 1; if(input->nDimension <= 2) { // Here we use the SpatialConvolution module to perform a linear transformation errcheck(cudnnCreateTensorDescriptor(&dinput)); if(input->nDimension == 1) errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, 1, input->size[0], 1, 1)); else errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, input->size[0], input->size[1], 1, 1)); } else errcheck(THcudnn_TensorDescriptor(&dinput, input)); errcheck(cudnnCreateFilterDescriptor(&dweight)); errcheck(cudnnSetFilter4dDescriptor(dweight, floattype, nOutputPlane, nInputPlane, kH, kW)); errcheck(cudnnCreateTensorDescriptor(&dbias)); errcheck(cudnnSetTensor4dDescriptor(dbias, CUDNN_TENSOR_NCHW, floattype, 1, bias->size[0], 1, 1)); errcheck(cudnnCreateConvolutionDescriptor(&dconv)); errcheck(cudnnSetConvolutionNdDescriptor(dconv, 2, pad, filterStride, upscale, CUDNN_CROSS_CORRELATION, floattype)); errcheck(cudnnGetConvolutionNdForwardOutputDim(dconv, dinput, dweight, 4, sizes)); THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]); errcheck(THcudnn_TensorDescriptor(&doutput, output)); if(alg == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM || alg == CUDNN_CONVOLUTION_FWD_ALGO_GEMM || alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT || alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) { errcheck(cudnnGetConvolutionForwardWorkspaceSize(THcudnn_getHandle(), dinput, dweight, dconv, doutput, alg, &reqwssize)); if(reqwssize > wssize) { wssize = reqwssize; errcheck(cudaMalloc(&ws, reqwssize)); } } errcheck(cudnnConvolutionForward(THcudnn_getHandle(), &one, dinput, THFloatTensor_data(input), dweight, THFloatTensor_data(weight), dconv, alg, ws, wssize, &zero, doutput, THFloatTensor_data(output))); errcheck(cudnnAddTensor_v3(THcudnn_getHandle(), &one, dbias, THFloatTensor_data(bias), &one, doutput, THFloatTensor_data(output))); cudnnDestroyTensorDescriptor(dinput); cudnnDestroyFilterDescriptor(dweight); cudnnDestroyTensorDescriptor(dbias); cudnnDestroyTensorDescriptor(doutput); cudnnDestroyConvolutionDescriptor(dconv); return output; }
virtual ~CuDNNPoolingLayer(void) { CUDA_CHECK(cudnnDestroyTensorDescriptor(in_desc_)); CUDA_CHECK(cudnnDestroyTensorDescriptor(out_desc_)); CUDA_CHECK(cudnnDestroyPoolingDescriptor(pooling_desc_)); CUDA_CHECK(cudnnDestroy(handle_)); }
sparse_fully_connected_1x1_layer_tester_cuda::~sparse_fully_connected_1x1_layer_tester_cuda() { cudnnDestroyTensorDescriptor(output_data_desc); cudnnDestroyTensorDescriptor(bias_desc); }
PoolBC01CuDNN<T>::~PoolBC01CuDNN() { CUDNN_CHECK(cudnnDestroyTensorDescriptor(imgs_desc)); CUDNN_CHECK(cudnnDestroyTensorDescriptor(poolout_desc)); CUDNN_CHECK(cudnnDestroyPoolingDescriptor(pool_desc)); }
ConvBC01CuDNN<T>::~ConvBC01CuDNN() { CUDNN_CHECK(cudnnDestroyTensorDescriptor(imgs_desc)); CUDNN_CHECK(cudnnDestroyTensorDescriptor(convout_desc)); CUDNN_CHECK(cudnnDestroyFilterDescriptor(filters_desc)); CUDNN_CHECK(cudnnDestroyConvolutionDescriptor(conv_desc)); }
fully_connected_layer_updater_cuda::~fully_connected_layer_updater_cuda() { cudnnDestroyTensorDescriptor(output_data_desc); cudnnDestroyTensorDescriptor(bias_desc); }
virtual ~Add2CudaCudnn() { NBLA_CUDNN_CHECK(cudnnDestroyTensorDescriptor(input_desc_)); NBLA_CUDNN_CHECK(cudnnDestroyTensorDescriptor(output_desc_)); }
convolution_1x1_layer_tester_cuda::~convolution_1x1_layer_tester_cuda() { cudnnDestroyTensorDescriptor(output_data_desc); cudnnDestroyTensorDescriptor(bias_desc); }