THFloatTensor *cudnn_SpatialMaxPooling_updateOutput(struct module *module, THFloatTensor *input)
{
	int kW = module->SpatialMaxPooling.kW;
	int kH = module->SpatialMaxPooling.kH;
	int dW = module->SpatialMaxPooling.dW;
	int dH = module->SpatialMaxPooling.dH;
	int padW = module->SpatialMaxPooling.padW;
	int padH = module->SpatialMaxPooling.padH;

	THFloatTensor *output = module->output;
	cudnnTensorDescriptor_t dinput, doutput;
	cudnnPoolingDescriptor_t dpool;
	float one = 1, zero = 0;
	int sizes[4];

	errcheck(THcudnn_TensorDescriptor(&dinput, input));
	errcheck(cudnnCreatePoolingDescriptor(&dpool));
	errcheck(cudnnSetPooling2dDescriptor(dpool, CUDNN_POOLING_MAX, kH, kW, padH, padW, dH, dW));
	errcheck(cudnnGetPoolingNdForwardOutputDim(dpool, dinput, 4, sizes));
	THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]);
	errcheck(THcudnn_TensorDescriptor(&doutput, output));

	errcheck(cudnnPoolingForward(THcudnn_getHandle(), dpool, &one, dinput, THFloatTensor_data(input), &zero,
		doutput, THFloatTensor_data(output)));

	cudnnDestroyTensorDescriptor(dinput);
	cudnnDestroyTensorDescriptor(doutput);
	cudnnDestroyPoolingDescriptor(dpool);
	return output;
}
Exemple #2
0
THFloatTensor *cudnn_Threshold_updateOutput(struct module *module, THFloatTensor *input)
{
	THFloatTensor *output = module->output;
	cudnnTensorDescriptor_t dinput, doutput;
	int inplace = module->Threshold.inplace;
	float one = 1, zero = 0;

	errcheck(THcudnn_TensorDescriptor(&dinput, input));
	if(inplace)
		THFloatTensor_set(output, input);
	else THCudaTensor_resize4d(output, input->size[0], input->size[1], input->size[2], input->size[3]);
	errcheck(THcudnn_TensorDescriptor(&doutput, output));

	errcheck(cudnnActivationForward(THcudnn_getHandle(), CUDNN_ACTIVATION_RELU, &one, dinput, THFloatTensor_data(input), &zero,
		doutput, THFloatTensor_data(output)));

	cudnnDestroyTensorDescriptor(dinput);
	cudnnDestroyTensorDescriptor(doutput);
	return output;
}
Exemple #3
0
void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0, long size1, long size2)
{
  THCudaTensor_resize4d(state, tensor, size0, size1, size2, -1);
}
Exemple #4
0
void THCudaTensor_resize1d(THCState *state, THCudaTensor *tensor, long size0)
{
  THCudaTensor_resize4d(state, tensor, size0, -1, -1, -1);
}
THFloatTensor *cudnn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input)
{
	int kW = module->SpatialConvolution.kW;
	int kH = module->SpatialConvolution.kH;
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;
	int padW = module->SpatialConvolution.padW;
	int padH = module->SpatialConvolution.padH;
	int nInputPlane  = module->SpatialConvolution.nInputPlane;
	int nOutputPlane = module->SpatialConvolution.nOutputPlane;

	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int sizes[4];
	int pad[2], filterStride[2], upscale[2];
	cudnnTensorDescriptor_t dinput, dbias, doutput;
	cudnnConvolutionDescriptor_t dconv;
	cudnnFilterDescriptor_t dweight;
	float one = 1, zero = 0;
	size_t reqwssize;
	static void *ws;
	static size_t wssize;
	static const int alg = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;

	pad[0] = padH;
	pad[1] = padW;
	filterStride[0] = dH;
	filterStride[1] = dW;
	upscale[0] = 1;
	upscale[1] = 1;

	if(input->nDimension <= 2)
	{
		// Here we use the SpatialConvolution module to perform a linear transformation
		errcheck(cudnnCreateTensorDescriptor(&dinput));
		if(input->nDimension == 1)
			errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, 1, input->size[0], 1, 1));
		else errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, input->size[0], input->size[1], 1, 1));
	} else errcheck(THcudnn_TensorDescriptor(&dinput, input));
	errcheck(cudnnCreateFilterDescriptor(&dweight));
	errcheck(cudnnSetFilter4dDescriptor(dweight, floattype, nOutputPlane, nInputPlane, kH, kW));
	errcheck(cudnnCreateTensorDescriptor(&dbias));
	errcheck(cudnnSetTensor4dDescriptor(dbias, CUDNN_TENSOR_NCHW, floattype, 1, bias->size[0], 1, 1));
	errcheck(cudnnCreateConvolutionDescriptor(&dconv));
	errcheck(cudnnSetConvolutionNdDescriptor(dconv, 2, pad, filterStride, upscale, CUDNN_CROSS_CORRELATION, floattype));
	errcheck(cudnnGetConvolutionNdForwardOutputDim(dconv, dinput, dweight, 4, sizes));
	THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]);
	errcheck(THcudnn_TensorDescriptor(&doutput, output));
	if(alg == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM || alg == CUDNN_CONVOLUTION_FWD_ALGO_GEMM ||
		alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT || alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING)
	{
		errcheck(cudnnGetConvolutionForwardWorkspaceSize(THcudnn_getHandle(), dinput, dweight, dconv, doutput, alg, &reqwssize));
		if(reqwssize > wssize)
		{
			wssize = reqwssize;
			errcheck(cudaMalloc(&ws, reqwssize));
		}			
	}
	errcheck(cudnnConvolutionForward(THcudnn_getHandle(), &one, dinput, THFloatTensor_data(input),
		dweight, THFloatTensor_data(weight), dconv, alg, ws, wssize, &zero,
		doutput, THFloatTensor_data(output)));
	errcheck(cudnnAddTensor_v3(THcudnn_getHandle(), &one, dbias, THFloatTensor_data(bias),
		&one, doutput, THFloatTensor_data(output)));
	cudnnDestroyTensorDescriptor(dinput);
	cudnnDestroyFilterDescriptor(dweight);
	cudnnDestroyTensorDescriptor(dbias);
	cudnnDestroyTensorDescriptor(doutput);
	cudnnDestroyConvolutionDescriptor(dconv);
	return output;
}