inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv, cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter, const int_tp num_spatial_dims, const int_tp* pad, const int_tp* stride) { std::vector<int> pad_int(num_spatial_dims); std::vector<int> stride_int(num_spatial_dims); std::vector<int> upscale_int(num_spatial_dims); for (int_tp i = 0; i < num_spatial_dims; ++i) { pad_int[i] = pad[i]; stride_int[i] = stride[i]; upscale_int[i] = 1; } const int* pad_ptr = &pad_int[0]; const int* stride_ptr = &stride_int[0]; const int* upscale_ptr = &upscale_int[0]; CUDNN_CHECK(cudnnSetConvolutionNdDescriptor(*conv, num_spatial_dims, pad_ptr, stride_ptr, upscale_ptr, CUDNN_CROSS_CORRELATION, dataType<Dtype>::type)); }
int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, cudnnConvolutionDescriptor_t *desc) { cudnnStatus_t err; int pad[3] = {PAD_0, PAD_1, PAD_2}; int strides[3] = {SUB_0, SUB_1, SUB_2}; int upscale[3] = {1, 1, 1}; #if BORDER_MODE == 0 pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) - 1; pad[1] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) - 1; #if NB_DIMS > 2 pad[2] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) - 1; #endif #elif BORDER_MODE == 2 pad[0] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 2) / 2; pad[1] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 3) / 2; #if NB_DIMS > 2 pad[2] = *(npy_int64 *)PyArray_GETPTR1(filt_shp, 4) / 2; #endif #endif if (PyArray_DIM(filt_shp, 0) - 2 != NB_DIMS) { PyErr_Format(PyExc_ValueError, "Filter shape has too many dimensions: " "expected %d, got %lld.", NB_DIMS, (long long)PyArray_DIM(filt_shp, 0)); return -1; } err = cudnnCreateConvolutionDescriptor(desc); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_MemoryError, "could not allocate convolution " "descriptor: %s", cudnnGetErrorString(err)); return -1; } err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides, upscale, CONV_MODE, PRECISION); return 0; }
THFloatTensor *cudnn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input) { int kW = module->SpatialConvolution.kW; int kH = module->SpatialConvolution.kH; int dW = module->SpatialConvolution.dW; int dH = module->SpatialConvolution.dH; int padW = module->SpatialConvolution.padW; int padH = module->SpatialConvolution.padH; int nInputPlane = module->SpatialConvolution.nInputPlane; int nOutputPlane = module->SpatialConvolution.nOutputPlane; THFloatTensor *weight = module->SpatialConvolution.weight; THFloatTensor *bias = module->SpatialConvolution.bias; THFloatTensor *output = module->output; int sizes[4]; int pad[2], filterStride[2], upscale[2]; cudnnTensorDescriptor_t dinput, dbias, doutput; cudnnConvolutionDescriptor_t dconv; cudnnFilterDescriptor_t dweight; float one = 1, zero = 0; size_t reqwssize; static void *ws; static size_t wssize; static const int alg = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; pad[0] = padH; pad[1] = padW; filterStride[0] = dH; filterStride[1] = dW; upscale[0] = 1; upscale[1] = 1; if(input->nDimension <= 2) { // Here we use the SpatialConvolution module to perform a linear transformation errcheck(cudnnCreateTensorDescriptor(&dinput)); if(input->nDimension == 1) errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, 1, input->size[0], 1, 1)); else errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, input->size[0], input->size[1], 1, 1)); } else errcheck(THcudnn_TensorDescriptor(&dinput, input)); errcheck(cudnnCreateFilterDescriptor(&dweight)); errcheck(cudnnSetFilter4dDescriptor(dweight, floattype, nOutputPlane, nInputPlane, kH, kW)); errcheck(cudnnCreateTensorDescriptor(&dbias)); errcheck(cudnnSetTensor4dDescriptor(dbias, CUDNN_TENSOR_NCHW, floattype, 1, bias->size[0], 1, 1)); errcheck(cudnnCreateConvolutionDescriptor(&dconv)); errcheck(cudnnSetConvolutionNdDescriptor(dconv, 2, pad, filterStride, upscale, CUDNN_CROSS_CORRELATION, floattype)); errcheck(cudnnGetConvolutionNdForwardOutputDim(dconv, dinput, dweight, 4, sizes)); THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]); errcheck(THcudnn_TensorDescriptor(&doutput, output)); if(alg == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM || alg == CUDNN_CONVOLUTION_FWD_ALGO_GEMM || alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT || alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) { errcheck(cudnnGetConvolutionForwardWorkspaceSize(THcudnn_getHandle(), dinput, dweight, dconv, doutput, alg, &reqwssize)); if(reqwssize > wssize) { wssize = reqwssize; errcheck(cudaMalloc(&ws, reqwssize)); } } errcheck(cudnnConvolutionForward(THcudnn_getHandle(), &one, dinput, THFloatTensor_data(input), dweight, THFloatTensor_data(weight), dconv, alg, ws, wssize, &zero, doutput, THFloatTensor_data(output))); errcheck(cudnnAddTensor_v3(THcudnn_getHandle(), &one, dbias, THFloatTensor_data(bias), &one, doutput, THFloatTensor_data(output))); cudnnDestroyTensorDescriptor(dinput); cudnnDestroyFilterDescriptor(dweight); cudnnDestroyTensorDescriptor(dbias); cudnnDestroyTensorDescriptor(doutput); cudnnDestroyConvolutionDescriptor(dconv); return output; }